Example #1
0
def generate_conjuncts():
  conj = '{"conjuncts1S1":["ak","akh","ag","agh","aṅ","ac","ach","aj","ajh","añ","aṭ","aṭh","aḍ","aḍh","aṇ","at","ath","ad","adh","an","ap","aph","ab","abh","am","ay","ar","al","av","aś","aṣ","as","ah","al̤"],"conjuncts2S1":["kka","kkha","kca","kcha","kṇa","kta","ktha","kna","kpa","kpha","kma","kya","kra","kla","kva","kśa","kṣa","ksa","khkha","khna","khya","khva","gga","ggha","gja","gḍa","gṇa","gda","gdha","gna","gba","gbha","gma","gya","gra","gla","gva","ghna","ghma","ghya","ghra","ghva","ṅka","ṅkha","ṅga","ṅgha","ṅṅa","ṅca","ṅja","ṅta","ṅda","ṅdha","ṅna","ṅpa","ṅbha","ṅma","ṅya","ṅra","ṅva","ṅśa","ṅsa","ṅha","cca","ccha","cña","cma","cya","cra","cva","chya","jja","jjha","jña","jma","jya","jra","jva","jha","jhña","cña","cha","ñja","ñjha","ñña","ñśa","ñha","ṭka","ṭkha","ṭca","ṭcha","ṭṭa","ṭṇa","ṭta","ṭpa","ṭpha","ṭma","ṭya","ṭva","ṭśa","ṭṣa","ṭsa","ṭhya","ḍga","ḍgha","ḍja","ḍḍa","ḍḍha","ḍda","ḍdha","ḍba","ḍbha","ḍma","ḍya","ḍra","ḍla","ḍva","ḍhya","ḍhra","ḍhva","ṇṭa","ṇṭha","ṇḍa","ṇḍha","ṇṇa","ṇna","ṇma","ṇya","ṇva","ṇha","tka","tkha","tta","ttha","tna","tpa","tpha","tma","tya","tra","tva","tṣa","tsa","thna","thya","thra","thva","dga","dgha","dda","ddha","dna","dba","dbha","dma","dya","dra","dva","dhna","dhma","dhya","dhra","dhva","nka","nkha","nga","ngha","nta","ntha","nda","ndha","nna","npa","npha","nba","nbha","nma","nya","nra","nva","nṣa","nsa","nha","pka","pkha","pca","pcha","pṭa","pṇa","pta","pna","ppa","ppha","pma","pya","pra","pla","pva","pśa","psa","bga","bja","bda","bdha","bba","bbha","bya","bra","bla","bva","bhṇa","bhna","bhma","bhya","bhra","bhla","bhva","mṇa","mna","mpa","mpha","mba","mbha","mma","mya","mra","mla","mva","mha","yya","yva","rka","rka","hra","gra","gha","rca","rcha","rja","rja","hra","ṭra","ḍra","ḍha","rṇa","rta","rtha","rda","rdha","rna","rpa","rpha","rba","rbha","rma","rya","rla","rva","rśa","rṣa","rsa","rha","lka","lga","lda","lpa","lpha","lba","lbha","lma","lya","lla","lva","lśa","lha","vṇa","vna","vya","vra","vla","śca","ścha","śna","śpa","śma","śya","śra","śla","śva","śśa","ṣka","ṣkha","ṣṭa","ṣṭha","ṣṇa","ṣpa","ṣpha","ṣma","ṣya","ṣra","ṣva","ṣṣa","ska","skha","sta","stha","sna","spa","spha","sma","sya","sra","sva","ssa","hṇa","hna","hma","hya","hra","hla","hva","l̤ha"],"conjuncts3S1":["kkra","kkla","kkva","kkṣa","ktya","ktra","ktva","kthna","kthya","knya","kpra","kpla","kmya","krya","klya","kśma","kśra","kśla","kśva","kṣṇa","kṣma","kṣya","kṣra","kṣva","ksta","kstha","ksna","kspa","kspha","ksma","ksya","ksra","ksva","ggra","gghya","gghra","gjña","gjya","gjva","gdya","gdra","gdva","gdhya","gdhra","gdhva","gnya","gbra","gbhya","gbhra","gmya","grya","grva","gvya","gvra","ghnya","ghrya","ghvya","ṅkta","ṅktha","ṅkya","ṅkra","ṅkla","ṅkva","ṅkṣa","ṅksa","ṅkhya","ṅgdha","ṅgya","ṅgra","ṅgva","ṅghna","ṅghya","ṅghra","ṅtra","ṅtva","ṅdhya","ṅnya","ṅnra","ṅpra","ṅvya","ṅvra","ṅsva","ccya","cchma","cchya","cchra","cchla","cchva","cñya","jjña","jjya","jjva","jjhya","jñya","jñva","jmya","jrya","jvya","ñcma","ñcya","ñcva","ñchna","ñchya","ñchra","ñchla","ñchva","ñjña","ñjma","ñjya","ñjva","ñśma","ñśya","ñśra","ñśla","ñśva","ṭkra","ṭkṣa","ṭṭya","ṭtra","ṭtva","ṭpra","ṭśra","ṭśla","ṭsta","ṭstha","ṭsna","ṭspa","ṭsva","ḍgya","ḍgra","ḍghra","ḍjña","ḍjya","ḍḍhya","ḍḍhva","ḍdva","ḍbra","ḍbhya","ḍbhra","ḍvya","ṇṭya","ṇṭhya","ṇḍḍha","ṇḍya","ṇḍra","ṇḍva","ṇḍhya","ṇḍhra","ṇvya","tkya","tkra","tkla","tkva","tkṣa","tkhya","ttna","ttma","ttya","ttra","ttva","ttsa","tthya","tnya","tnva","tpra","tpla","tmya","tyva","trya","trva","tvya","tska","tskha","tsta","tstha","tsna","tspa","tspha","tsma","tsya","tsra","tsva","thnya","thvya","dgra","dgla","dghna","dghra","ddya","ddra","ddva","ddhma","ddhya","ddhra","ddhva","dbra","dbhya","dbhra","dbhva","dmya","drya","drva","dvya","dvra","dhnya","dhrya","dhvya","dhvra","nkra","nkla","nkva","nkṣa","nkhya","ngra","ngla","nghna","nghra","ntta","nttha","ntma","ntya","ntra","ntva","ntsa","nthya","nddha","ndma","ndya","ndra","ndva","ndhma","ndhya","ndhra","ndhva","nnya","nnva","npra","npla","npsa","nbra","nbhra","nmya","nmra","nmla","nyva","nvya","nvra","nska","nskha","nsta","nstha","nsna","nspa","nspha","nsma","nsya","nsra","nsva","nhya","nhra","nhva","pkṣa","ptya","ptra","ptva","pnya","ppra","prya","pśya","psna","psya","psva","bgra","bjya","bdya","bdhya","bdhva","bbra","bbhya","bvya","bhrya","bhrva","bhvya","mnya","mpya","mpra","mpla","mpsa","mbya","mbra","mbva","mbhya","mbhra","mmya","mmra","mmla","mrya","rkca","rkta","rktha","rkpa","rkya","rkṣa","rksa","rkhya","rgga","rggha","rgja","rgbha","rgya","rgra","rgla","rgva","rghna","rghya","rghra","rṅkha","rṅga","rccha","rcya","rjña","rjma","rjya","rjva","rñja","rḍya","rḍhya","rṇṇa","rṇya","rṇva","rtta","rtna","rtma","rtya","rtra","rtva","rtsa","rthya","rddha","rdma","rdya","rdra","rdva","rdhna","rdhma","rdhya","rdhra","rdhva","rnya","rnva","rpya","rbra","rbhya","rbhra","rbhva","rmya","rmra","rmla","ryya","rvya","rvra","rvla","rśma","rśya","rśva","rṣṭa","rṣṭha","rṣṇa","rṣma","rṣya","rṣva","rsra","rsva","rhya","rhra","rhla","rhva","lkya","lgva","lpya","lbya","lbhya","llya","lvya","lhya","vnya","ścya","śnya","śmya","śrya","śrva","śvya","ṣkya","ṣkra","ṣkla","ṣkva","ṣkṣa","ṣṭya","ṣṭra","ṣṭva","ṣṭhya","ṣṭhva","ṣṇya","ṣṇva","ṣpya","ṣpra","ṣpla","ṣmya","skra","stma","stya","stra","stva","stsa","sthna","sthya","snya","spra","sphya","smya","srya","svya","ssya","ssva","hnya","hmya","hvya"],"conjuncts4S1":["ktrya","ktvya","kṣṇya","kṣmya","kstra","gdvya","gdhrya","ṅktya","ṅktra","ṅktva","ṅkṣṇa","ṅkṣma","ṅkṣya","ṅkṣva","ṅgdhya","ṅgdhva","ṅghrya","tkṣma","tkṣva","ttrya","tstra","tsthya","tspra","tsphya","ddvya","nttva","ntrya","ntvya","ntsta","ntstha","ntsna","ntspa","ntsya","ntsra","ntsva","nddhya","nddhva","ndrya","ndvya","ndhrya","nstra","nsphya","ptrya","psnya","rkṣṇa","rkṣya","rksva","rṅgya","rjmya","rttra","rtnya","rtrya","rtvya","rtsna","rtsya","rddhya","rdrya","rdvya","rdhnya","rśvya","rṣṭya","rṣṇya","lgvya","ṣṭrya","strya","sthnya"],"conjuncts5S1":["rtsnya"]}'
  vowels = ['a', 'ā', 'i', 'ī', 'u', 'ū', 'ṛ', 'ĕ', 'e', 'ai', 'ŏ', 'o', 'au', 'aṃ', 'aḥ']

  scripts = ["Wancho", "Mro", "Marchen", "Sharada"]

  total = len(scripts) * len(vowels)

  i = 0

  for script1 in scripts:
    results = {}
    postoptions = []

    if script1[0:3] < 'Mod':
        index = '1'
    else:
        index = '2'

    for vowel in vowels:
      i = i + 1

      print('Processing ' + str(i) + ' out of ' + str(total))

      conj2 = json.loads(conj.replace('a', vowel))

      for key, value in conj2.items():
          result_script1 = list(unique_everseen([convert('IAST', script1, x, False,[],[]) for x in value]))
          result_iast = [convert(script1, 'IAST', x, False,['removeChillus'],[]) for x in result_script1]
          actual_result = sorted(set(value) & set(result_iast), key=value.index)

          results[key] = [convert('IAST', script1, x, False,[], postoptions) for x in actual_result]

      print("resources/conjuncts"+ index + "/conjuncts_" + script1 + "_" + vowel + ".json")

      f = io.open("resources/conjuncts"+ index + "/conjuncts_" + script1 + "_" + vowel + ".json", mode="w", encoding="utf-8")
      f.write(json.dumps(results, ensure_ascii = False, sort_keys=True, indent=4))
      f.close()

      if (script1 == 'Sinhala' or script1 == 'Chakma'):
        postoptions = ['SinhalaConjuncts', 'ChakmaEnableAllConjuncts']

        for key, value in conj2.items():
            result_script1 = list(unique_everseen([convert('IAST', script1, x, False,[],[]) for x in value]))
            result_iast = [convert(script1, 'IAST', x, False,['removeChillus'],[]) for x in result_script1]
            actual_result = sorted(set(value) & set(result_iast), key=value.index)

            results[key] = [convert('IAST', script1, x, False,[], postoptions) for x in actual_result]

        f = io.open("resources/conjuncts"+ index + "/conjuncts_" + script1 + "_" + vowel + "_all.json", mode="w", encoding="utf-8")
        f.write(json.dumps(results, ensure_ascii = False, sort_keys=True, indent=4))
        f.close()
def generate_syllables():
    results = {}
    scripts_syllabary = ["Kaithi"]

    k = 0
    for script1 in scripts_syllabary:
        k = k + 1
        print(script1)
        print('Syllabary ' + str(k))
        vowelsAll = [
            'a', 'A', 'i', 'I', 'u', 'U', 'R', 'RR', 'lR', 'lRR', 'E', 'e',
            'ai', 'O', 'o', 'au', 'aE', 'AE', 'aO', 'aM', 'aH', 'a~'
        ]

        if script1 == 'Tamil':
            pp = 'RemoveDiacriticsTamil'
        else:
            pp = 'RemoveDiacritics'

        vowelsScript1 = list(
            unique_everseen([
                convert('HK', script1, x, False, [], [pp]) for x in vowelsAll
            ]))
        vowelsScript1_hk = [
            convert(script1, 'HK', x, False, [], []).strip()
            for x in vowelsScript1
        ]

        actual_vowels = sorted(set(vowelsAll) & set(vowelsScript1_hk),
                               key=vowelsAll.index)

        print(actual_vowels)

        if script1 == 'Tamil':
            actual_vowels = [
                x for x in actual_vowels if x not in ['aE', 'AE', 'aO']
            ]

        vowelsScript1 = list(
            unique_everseen([
                convert('HK', script1, x, False, [], [pp])
                for x in actual_vowels
            ]))
        vowelsUnique = [
            convert(script1, 'HK', x, False, [], []).strip()
            for x in vowelsScript1
        ]

        consonantsAll = [
            'k', 'kh', 'g', 'gh', 'G', 'c', 'ch', 'j', 'jh', 'J', 'T', 'Th',
            'D', 'Dh', 'N', 't', 'th', 'd', 'dh', 'n', 'p', 'ph', 'b', 'bh',
            'm', 'y', 'r', 'l', 'v', 'z', 'S', 's', 'h', 'Z', 'L', 'r2', 'n2',
            'q', 'qh', 'g2', 'z2', 'r3', 'r3h', 'f', 'Y'
        ]

        if script1 == 'Sinhala' or script1 == 'Rejang':
            consonantsAll = consonantsAll + ['n*g', 'n*j', 'n*D', 'n*d', 'm*b']

        consonantsScript1 = list(
            unique_everseen([
                convert('HK', script1, x + 'a', False, [], [pp])
                for x in consonantsAll
            ]))
        consonantsScript1_hk = map(removeA, [
            convert(script1, 'HK', x, False, [], []).strip()
            for x in consonantsScript1
        ])
        actual_consonants = sorted(set(consonantsAll)
                                   & set(consonantsScript1_hk),
                                   key=consonantsAll.index)

        print(actual_consonants)

        consonantsScript1 = list(
            unique_everseen([
                convert('HK', script1, x + 'a', False, [], [pp]).strip()
                for x in actual_consonants
            ]))

        consonantUnique = [
            convert(script1, 'HK', x, False, [], [pp]).strip()
            for x in consonantsScript1
        ]
        consonantUnique = [x.replace('a', '') for x in consonantUnique]

        print(consonantUnique)

        compound = []

        if script1 == 'BatakSima':
            vowelsUnique.append('e')
            vowelsUnique.append('o')
            vowelsUnique.append('au')

        elif script1 == 'BatakManda':
            vowelsUnique.append('e')
            vowelsUnique.append('o')

        elif script1 == 'BatakPakpak':
            vowelsUnique.append('e')
            vowelsUnique.append('aE')
            vowelsUnique.append('o')

        elif script1 == 'BatakToba':
            vowelsUnique.append('e')
            vowelsUnique.append('o')

        elif script1 == 'BatakKaro':
            vowelsUnique.append('e')
            vowelsUnique.append('aE')
            vowelsUnique.append('o')
            vowelsUnique.append('aO')

        elif script1 == 'Khojki':
            vowelsUnique.append('I')

        vowelsUnique = [
            x for x in sorted(vowelsUnique, key=vowelsAll.index) if x != 'a'
        ]

        print(vowelsUnique)

        for cons in consonantUnique:
            for vow in vowelsUnique:
                compound.append(cons + vow)

            compound.append(cons)
            compound.append('&' + cons)

        if script1 == 'Multani' or script1 == 'Mahajani':
            compound = []

        print(compound)
        compoundsScript1 = list(
            unique_everseen([
                convert('HK', script1, x, False, [], [pp]) for x in compound
            ]))
        print(compoundsScript1)
        compoundsScript1 = [
            x for x in compoundsScript1 if x not in consonantsScript1
        ]

        print(compoundsScript1)

        results['vowels'] = vowelsScript1
        results['consonants'] = consonantsScript1
        results['compounds'] = compoundsScript1

        print(results)

        f = io.open("resources/syllabary/syllabary_" + script1 + ".json",
                    mode="w",
                    encoding="utf-8")
        f.write(
            json.dumps(results, ensure_ascii=False, sort_keys=True, indent=4))
        f.close()