def dump_text(base_dir, do_transliteration=False): unit_info_file = os.path.join(os.path.dirname(text_data.__file__), "shatapatha.json") titus_url = "http://titus.uni-frankfurt.de/texte/etcs/ind/aind/ved/yvw/sbm/sbm.htm" for kaanda_index in text_data.get_subunit_list(json_file=unit_info_file, unit_path_list=[]): sarga_list = text_data.get_subunit_list(json_file=unit_info_file, unit_path_list=[kaanda_index]) for sarga_index in sarga_list: logging.info("kaanDa %d adhyaaya %d", kaanda_index, sarga_index) outfile_path = os.path.join(base_dir, "%02d" % (kaanda_index), "%02d" % sarga_index + ".md") if os.path.exists(outfile_path): logging.info("Skipping " + outfile_path) continue titus.navigate_to_part(base_page_url=titus_url, level_3_id=kaanda_index, level_4_id=sarga_index) sentences = titus.get_text() lines = ["\n"] for sentence in sentences: sentence = roman.RomanScheme.simplify_accent_notation(sentence) sentence = sentence.replace("/", ".") if not sentence.endswith("."): sentence = sentence + ".." if do_transliteration: if kaanda_index == 12: sentence = sanscript.transliterate(sentence, sanscript.IAST, sanscript.DEVANAGARI) else: sentence = sanscript.transliterate(sentence, sanscript.TITUS, sanscript.DEVANAGARI) sentence = roman.RomanScheme.to_shatapatha_svara(sentence) lines.append(sentence + " \n") os.makedirs(name=os.path.dirname(outfile_path), exist_ok=True) with open(outfile_path, "w") as outfile: outfile.writelines(lines)
def test_devanaagarii_equivalence(): """Test all synonmous transliterations.""" logging.info( sanscript.transliterate("rAmo gUDhaM vaktI~Ngitaj~naH kShetre", sanscript.ITRANS, sanscript.DEVANAGARI)) assert sanscript.transliterate("rAmo gUDhaM vaktI~Ngitaj~naH kShetre", sanscript.ITRANS, sanscript.DEVANAGARI) == \ sanscript.transliterate("raamo guuDhaM vaktii~NgitaGYaH xetre", sanscript.ITRANS, sanscript.DEVANAGARI)
def get_headers(self): return [ self.gana_varga_naama, NIGHANTU_NAAMA + " - _" + self.index, transliterate(self.gana_varga_naama, DEVANAGARI, ITRANS), transliterate(NIGHANTU_NAAMA + " - _" + self.index, DEVANAGARI, ITRANS) ]
def get_file_path(out_dir, title_iast, author_iast="", catalog_number=""): title_optitrans = sanscript.transliterate(data=title_iast, _from=sanscript.IAST, _to=sanscript.OPTITRANS) author_optitrans = sanscript.transliterate(data=author_iast, _from=sanscript.IAST, _to=sanscript.OPTITRANS) file_path = "%s_%s_%s.md" % (title_optitrans, author_optitrans, catalog_number.strip()) file_path = file_helper.clean_file_path(file_path=file_path) file_path = os.path.join(out_dir, file_path) return file_path
def get_item(id, dir_path): import urllib.parse dashaka_id = "नारायणीयम्/दशकम्_%s" % sanscript.transliterate( str(id), sanscript.SLP1, sanscript.DEVANAGARI) logging.info(dashaka_id) item_url = "https://sa.wikisource.org/wiki/" + urllib.parse.quote( dashaka_id) logging.info(item_url) browser.get(item_url) text = browser.find_element_by_css_selector("div.poem").text text = text.replace("cअ", "च").replace("cइ", "चि").replace( "cई", "ची").replace("cउ", "चु").replace("cऊ", "चू").replace( "cऋ", "चृ").replace("cॠ", "चॄ").replace("cऌ", "चॢ").replace( "cॡ", "चॣ").replace("cए", "चे").replace("cऐ", "चै").replace( "cओ", "चो").replace("cऔ", "चौ").replace("c", "च्").replace("ळ", "ल") shlokas = text.split("\n\n") outfile_path = os.path.join(dir_path, "%03d.md" % id) os.makedirs(name=os.path.dirname(outfile_path), exist_ok=True) with open(outfile_path, "w") as outfile: for shloka_id in range(1, len(shlokas) + 1): outfile.write( "<div class=\"audioEmbed\" caption=\"सीतालक्ष्मी-वाचनम्\" src=\"https://sanskritdocuments.org/sites/completenarayaneeyam/SoundFiles/%03d/%03d_%02d.mp3\"></div> \n" % (id, id, shloka_id)) outfile.writelines(shlokas[shloka_id - 1].replace("\n", " \n") + "\n\n") md_file = md_helper.MdFile(file_path=outfile_path) md_file.set_title(sanscript.transliterate("%03d" % id, sanscript.SLP1, sanscript.DEVANAGARI), dry_run=False)
def fix_lazy_anusvaara(self, data_in, omit_sam=False, omit_yrl=False, ignore_padaanta=False): from indic_transliteration import sanscript if ignore_padaanta: return self.fix_lazy_anusvaara_except_padaantas(data_in=data_in, omit_sam=omit_sam, omit_yrl=omit_yrl) data_out = sanscript.transliterate(data=data_in, _from=self.name, _to=sanscript.DEVANAGARI) data_out = sanscript.SCHEMES[sanscript.DEVANAGARI].fix_lazy_anusvaara(data_in=data_out, omit_sam=omit_sam, omit_yrl=omit_yrl) return sanscript.transliterate(data=data_out, _from=sanscript.DEVANAGARI, _to=self.name)
def get_storage_name(text, source_script=None, max_length=50, maybe_use_dravidian_variant=True, mixed_languages_in_titles=True): from indic_transliteration import detect if source_script is None: source_script = detect.detect(text=text) text_optitrans = regex.sub("/ *", "__", text) if source_script in roman.ALL_SCHEME_IDS: if source_script in roman.CAPITALIZABLE_SCHEME_IDS: if mixed_languages_in_titles: text_optitrans = sanscript.SCHEMES[ sanscript.IAST].mark_off_non_indic_in_line(text_optitrans) text_optitrans = sanscript.transliterate( text_optitrans, source_script, sanscript.OPTITRANS, suspend_on=set('<'), suspend_off=set('>'), maybe_use_dravidian_variant=maybe_use_dravidian_variant) else: text_optitrans = sanscript.transliterate( text_optitrans, source_script, sanscript.OPTITRANS, maybe_use_dravidian_variant=maybe_use_dravidian_variant) storage_name = clean_file_path(text_optitrans) if max_length is not None: storage_name = storage_name[:max_length] return storage_name
def extractFirstPage(im, coords): """ im: Image object coords: List of size-4 tuples. Order: Box-1 containing mandal name, pin code etc. Box-2 polling station name Box-3 polling station address Box-4 net electors male Box-5 net electors female Box-6 net electors third gender Box-7 total electors """ im.seek(0) resAll = dealWithFirstPage(im, coords) res2 = resAll[0].split("\n") resNoEmp = [i for i in res2 if not i == ''] main_town = (resNoEmp[0].split("ः")[1].strip() if len( resNoEmp[0].split(":")) == 1 else resNoEmp[0].split(":")[1]).strip() police_station = (resNoEmp[5].split("ः")[1].strip() if len( resNoEmp[5].split(":")) == 1 else resNoEmp[5].split(":")[1]).strip() pin_code = (resNoEmp[8].split("ः")[1].strip() if len( resNoEmp[8].split(":")) == 1 else resNoEmp[8].split(":")[1]).strip() main_town_eng = transliterate(main_town, sanscript.DEVANAGARI, sanscript.ITRANS) police_station_eng = transliterate(police_station, sanscript.DEVANAGARI, sanscript.ITRANS) polling_station_name = transliterate(resAll[1], sanscript.DEVANAGARI, sanscript.ITRANS) polling_station_address = transliterate(resAll[2], sanscript.DEVANAGARI, sanscript.ITRANS) return [ main_town_eng, police_station_eng, pin_code, polling_station_name, polling_station_address, resAll[3], resAll[4], resAll[5], resAll[6], main_town, police_station, resAll[1], resAll[2] ]
def get_headers(self): return [ self.varga_naama, NIGHANTU_NAAMA + ' _' + transliterate(self.index, ITRANS, DEVANAGARI), transliterate(self.varga_naama, DEVANAGARI, ITRANS), transliterate(NIGHANTU_NAAMA, DEVANAGARI, ITRANS) + ' _' + self.index, 'abdm _' + self.index ]
def transliterate(text, toDevanagari=False): if toDevanagari: text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST) else: text = sanscript.transliterate(text, sanscript.IAST, sanscript.DEVANAGARI) return text
def get_headers(self): if not self.shloka_sankhya: return [] shloka_sankhya_it = transliterate(self.shloka_sankhya, DEVANAGARI, ITRANS) return [ NIGHANTU_NAAMA + ' ' + self.shloka_sankhya, transliterate(NIGHANTU_NAAMA, DEVANAGARI, ITRANS) + ' ' + shloka_sankhya_it, 'abdm {}'.format(shloka_sankhya_it) ]
def get_standard_form(self, data): """Roman schemes define multiple representations of the same devanAgarI character. This method gets a library-standard representation. data : a text in the given scheme. """ if self.synonym_map is None: return data from indic_transliteration import sanscript return sanscript.transliterate(data=sanscript.transliterate(_from=self.name, _to=sanscript.DEVANAGARI, data=data), _from=sanscript.DEVANAGARI, _to=self.name)
def test_devanaagarii_equivalence(self): """Test all synonmous transliterations.""" print S.transliterate("rAmo gUDhaM vaktI~Ngitaj~naH kShetre", S.ITRANS, S.DEVANAGARI), self.assertEqual( S.transliterate("rAmo gUDhaM vaktI~Ngitaj~naH kShetre", S.ITRANS, S.DEVANAGARI), S.transliterate("raamo guuDhaM vaktii~NgitaGYaH xetre", S.ITRANS, S.DEVANAGARI))
def repl(matchobject): data = transliterate(matchobject.group(0), 'devanagari', 'slp1') data = re.sub('M([kKgG])', 'N\g<1>', data) data = re.sub('M([cCjJ])', 'Y\g<1>', data) data = re.sub('M([wWqQ])', 'R\g<1>', data) data = re.sub('M([tTdD])', 'n\g<1>', data) data = re.sub('M([pPbB])', 'm\g<1>', data) data = transliterate(data, 'slp1', 'devanagari') return data
def separate_rks(dry_run=False): dest_dir_Rks = "/home/vvasuki/vvasuki-git/vedAH/static/atharva/shaunakam/rUDha-saMhitA/mUlam/" suukta_paths = glob.glob("/home/vvasuki/vvasuki-git/vedAH/content/atharva/shaunakam/rUDha-saMhitA_alt/*/*.md", recursive=True) for suukta_path in suukta_paths: md_file = MdFile(file_path=suukta_path) [metadata, md] = md_file.read_md_file() lines = md.split("\n") meta_lines = list(itertools.takewhile(lambda line: "॒" not in line and "॑" not in line, lines)) lines = list(itertools.dropwhile(lambda line: "॒" not in line and "॑" not in line, lines)) lines = [line for line in lines if line != ""] rk_id = 0 chapter_id = suukta_path.split("/")[-2] suukta_id = metadata["title"].split()[0] suukta_id_roman = sanscript.transliterate(suukta_id, sanscript.DEVANAGARI, sanscript.IAST) suukta_title = " ".join(metadata["title"].split()[1:]).replace("।", "").strip() dest_path_suukta = os.path.join(dest_dir_suuktas, chapter_id, suukta_id_roman + ".md") rk_map = {} while(len(lines) > 0): lines_rk = list(itertools.takewhile(lambda line: "॥" not in line, lines)) lines_rk.append(lines[len(lines_rk)]) if len(lines) == len(lines_rk): lines = [] else: lines = lines[len(lines_rk):] rk_id = rk_id + 1 rk_md = "\n".join(lines_rk) rk_id_str = sanscript.transliterate("%02d" % rk_id, sanscript.IAST, sanscript.DEVANAGARI) from doc_curation import text_data title_Rk = text_data.get_rk_title(rk_id=rk_id_str, rk_text=rk_md) dest_path_Rk = os.path.join(dest_dir_Rks, chapter_id, suukta_id_roman, sanscript.transliterate(rk_id_str, sanscript.DEVANAGARI, sanscript.IAST) + ".md") md_file_Rk = MdFile(file_path=dest_path_Rk) md_file_Rk.dump_to_file(metadata={"title": title_Rk}, content=rk_md, dry_run=dry_run) md_file_Rk.set_filename_from_title(transliteration_source=sanscript.DEVANAGARI, dry_run=dry_run) rk_map[rk_id_str] = md_file_Rk.file_path suukta_md = "" for rk_id in sorted(rk_map.keys()): dest_path_Rk = rk_map[rk_id] suukta_md = suukta_md + """ <div class="js_include" url="%s" newLevelForH1="2" includeTitle="false"> </div> """ % dest_path_Rk.replace("/home/vvasuki/vvasuki-git", "").replace("static/", "") import textwrap suukta_md = """ ## परिचयः %s ## पाठः %s """ % ("\n ".join(meta_lines), suukta_md) md_file_suukta = MdFile(file_path=dest_path_suukta) md_file_suukta.dump_to_file(metadata={"title": "%s %s" % (suukta_id, suukta_title)}, content=textwrap.dedent(suukta_md), dry_run=dry_run) md_file_suukta.set_filename_from_title(transliteration_source=sanscript.DEVANAGARI, dry_run=dry_run)
def tessBox(box_lst): """ box_lst : Input format : A list of lists, with each inner list being of the format: 0: Main box 1: voterIDBox 2: boxNumber 3: Pagenumber """ rows = [] st = time.time() indicesWithErrors = [] resLst = picEater(box_lst) for i, rex in enumerate(resLst): op = rex[0].communicate()[0].decode('utf-8') voter_id = rex[1].communicate()[0].decode('utf-8') voter_id = dealWithID(voter_id) boxNumber = rex[2].communicate()[0].decode('utf-8') print("box number :" + str(i)) try: res = op.split("\n") res = [i for i in res if not i == ''] name_regional = (res[0].split("ः")[1] if len(res[0].split(":")) == 1 else res[0].split(":")[1]).strip() husband_or_father_regional = (res[1].split("ः")[1] if len( res[0].split(":")) == 1 else res[1].split(":")[1]).strip() house_number = '' if (len(res[2].split(":")) > 1): house_number = res[2].split(":")[1].strip() elif (len(res[2].split("ः")) > 1): house_number = res[2].split("ः")[1].strip() house_number = dealWithHouses(house_number) has_husband = 1 if ( res[1].split("ः")[0] if len(res[0].split(":")) == 1 else res[1].split(":")[0]).split(" ")[0].strip() == "पति" else 0 age = re.search('[0-9]+', res[len(res) - 1]).group(0) if (int(age) < 18): raise ValueError() intermediate = res[len(res) - 1].split(" ") gender = 'F' if re.match('म', intermediate[ len(intermediate) - 1]) != None else 'M' #3 for other states, 4 for UK. opName = transliterate(name_regional, sanscript.DEVANAGARI, sanscript.ITRANS) opHusband_or_father = transliterate(husband_or_father_regional, sanscript.DEVANAGARI, sanscript.ITRANS) rows.append([ opName, age, gender, opHusband_or_father, has_husband, house_number, voter_id, boxNumber, box_lst[i][3], name_regional, husband_or_father_regional ]) except: indicesWithErrors.append(i) print("Had Errors.") return rows, indicesWithErrors
def fix_lazy_anusvaara(self, data_in): from indic_transliteration import sanscript data_itrans = sanscript.transliterate(data=data_in, _from=self.name, _to=sanscript.ITRANS) itrans_fixed = sanscript.SCHEMES[sanscript.ITRANS].fix_lazy_anusvaara( data_in=data_itrans) return sanscript.transliterate(data=itrans_fixed, _from=sanscript.ITRANS, _to=self.name)
def equal_dvng(self, w1, w2): ma = "म्" ma = ma.decode('utf-8') if not (w1.endswith(ma) or w2.endswith(ma)): return w1 == w2 w1 = sanscript.transliterate(w1, sanscript.DEVANAGARI, sanscript.SLP1) w2 = sanscript.transliterate(w2, sanscript.DEVANAGARI, sanscript.SLP1) w1 = re.sub(r'[mM]$', 'm', w1) w2 = re.sub(r'[mM]$', 'm', w2) return w1 == w2
def convert(text, intran, outtran): """Convert a text from intran to outtran transliteration.""" result = '' if intran == outtran: result = text elif sys.version_info[0] < 3: result = sanscript.transliterate(text, intran, outtran).replace(u'|', u'.') else: result = sanscript.transliterate(text, intran, outtran).replace('|', '.') return result
def test_from_devanagari(test_case): dev_string = test_case["dev_string"] script = test_case["script"] expected_text = test_case["text"] # logging.debug("Converting %s, expecting %s in %s" % (dev_string, expected_text, script)) if script in SCRIPT_NAME_MAP.keys(): script = SCRIPT_NAME_MAP[script] if script in "dev" or (script not in sanscript.SCHEMES.keys()): logging.debug("Skipping over script - " + script) return result = sanscript.transliterate(dev_string, sanscript.DEVANAGARI, script) result_dev = sanscript.transliterate(result, script, sanscript.DEVANAGARI) assert expected_text == result or dev_string == result_dev, "Failed to convert to " + script + " from devanAgarI: got " + result + " instead of " + expected_text
def back_transliterate_word(self, word: str, predicted_lang_id=None): word = word.lower() if not self.en_dict.check(word) and predicted_lang_id != 1: word = unicodedata.normalize('NFKC', word) word = transliterate(word, sanscript.KOLKATA, sanscript.DEVANAGARI) else: transliterated_word = unicodedata.normalize('NFKC', word) transliterated_word = transliterate(transliterated_word, sanscript.KOLKATA, sanscript.DEVANAGARI) if unicodedata.normalize('NFKC', transliterated_word) in self.hindi_words: word = transliterated_word return unicodedata.normalize('NFKC', word).upper()
def handle_data(self, data): # print("Encountered some data :", self.current_tag, ': ', data) final_data = data if self.current_tag in ['l', 'pc']: # as of not not using this info return if self.current_tag in ['key1', 'key2'] and self.key_fromLang != self.key_toLang: final_data = transliterate( data, self.key_fromLang, self.key_toLang) if self.current_tag == 's' and self.fromLang != self.toLang: # sanskrit word final_data = transliterate( data, self.fromLang, self.toLang) self.mark_down = self.mark_down + final_data
def get_headers(self): dhaatu_headers = [ self.dhaatu_naama, NIGHANTU_NAAMA + ' _' + transliterate(self.index, ITRANS, DEVANAGARI), transliterate(self.dhaatu_naama, DEVANAGARI, ITRANS), transliterate(NIGHANTU_NAAMA, DEVANAGARI, ITRANS) + ' _' + self.index, 'abdm _' + self.index ] for shloka in self.shlokas: shloka_headers = shloka.get_headers() if shloka_headers is None: continue dhaatu_headers.extend(shloka_headers) return dhaatu_headers
def unused_convert_sanskrit(text, inTran, outTran): """Return transliterated adjusted text.""" text1 = '' counter = 0 # Remove '<srs/>' text = text.replace('<srs/>', '') # Change the s tag to span. for i in re.split('<s>([^<]*)</s>', text): if counter % 2 == 0: text1 += i else: text1 += '<span class="s">' + sanscript.transliterate( i, 'slp1', outTran) + '</span>' counter += 1 # PE nesting of LB tag text1 = text1.replace('<div n="1"/>', 'emsp;<div n="1"></div>') text1 = text1.replace('<div n="2"/>', 'emsp;emsp;<div n="2"></div>') text1 = text1.replace('<div n="3"/>', 'emsp;emsp;emsp;<div n="3"></div>') text1 = text1.replace('<div n="4"/>', 'emsp;emsp;emsp;emsp;<div n="4"></div>') text1 = text1.replace('<div n="5"/>', 'emsp;emsp;emsp;emsp;emsp;<div n="5"></div>') text1 = re.sub('<div n="([^"]*)"/>', '<div n="\g<1>"></div>', text1) text1 = text1.replace('<lb/>', '<br />') # AP90 compounds and meanings break text1 = text1.replace('<b>--', '<br /><b>--') text1 = text1.replace('<span class="s">--', '<br /><span class="s">--') # — breaks text1 = text1.replace('— ', '<br />— ') return text1
def dump_deep_text(url_text_id, url_leaf_id_padding, dir_path, unit_info_file, get_collapsible_content=False, dry_run=False): unit_data = text_data.get_subunit_data(unit_info_file, []) for subunit_path in text_data.get_subunit_path_list( json_file=unit_info_file, unit_path_list=[]): relative_dir_path = "/".join(["%02d" % x for x in subunit_path[:-1]]) outfile_path = os.path.join(dir_path, relative_dir_path, "%03d.md" % subunit_path[-1]) import urllib item_url = "https://sa.wikisource.org/wiki/%s/%s" % ( urllib.parse.quote(url_text_id), get_wiki_path(subunit_path=subunit_path, unit_data=unit_data, url_id_padding=url_leaf_id_padding)) title = sanscript.transliterate("%03d" % subunit_path[-1], sanscript.SLP1, sanscript.DEVANAGARI) logging.info("Getting %s to %s with title %s", item_url, outfile_path, title) if not dry_run: dump_item(title=title, outfile_path=outfile_path, item_url=item_url, get_collapsible_content=get_collapsible_content)
def devanagari(self, strict_io=True): """ Return devanagari transcoding of self """ s = self.thing if not strict_io: s = normalization.denormalize(s) return sanscript.transliterate(s, SLP1, DEVANAGARI)
def create_slp(code): """Create SLP file for a given dictionary code. code is to be selected from dictcode.json. """ # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') fin = codecs.open(filein, 'r', 'utf-8') data = fin.read() fin.close() # Convert the data to SLP1. data = sanscript.transliterate(data, 'devanagari', 'slp1') # Output directory directory = os.path.join('..', fullName, 'slp') # Create if the directory does not exist. if not os.path.exists(directory): os.mkdir(directory) fileout = os.path.join(directory, bookName + '.txt') # Create output file and save the SLP data in it. fout = codecs.open(fileout, 'w', 'utf-8') fout.write(data) fout.close()
def hyperlink(bl_file_path, all_words: list): if sys.platform != 'linux': print( 'hyperlinking currently requires sed, which may exist only on unix compilant platforms' ) sys.exit(1) print('\nhyperlinking.... may take a minute') considered_words = filter(lambda w: len(w) > 3 and '"' not in w, all_words) sorted_words = sorted(considered_words, key=lambda w: len(w), reverse=True) batch_size = 100 no_batches = math.ceil(len(sorted_words) / batch_size) for bno in range(no_batches): words = sorted_words[batch_size * bno:batch_size * (bno + 1)] sed_command = 'sed -i \'' + '; '.join([ r'7~3s#\([,;\. -]\)\({w}\)\([,;-\. -]\)#\1<a href="{s}">{s}</a>\3#ig' .format(w=w, s=sanscript.transliterate( w.lower(), scheme_map=HWS_XLITERATE_SCHEME_MAPS[0])) for w in words ]) + '\' "{}"'.format(bl_file_path) # print(sed_command) os.system(sed_command)
def test_to_devanagari(test_case): logging.debug(str(test_case)) dev_string = test_case["dev_string"] script = test_case["script"] text = test_case["text"] result = sanscript.transliterate(text, script, sanscript.DEVANAGARI) assert result == dev_string, "Failed to convert " + script + " to devanAgarI: got " + result + " instead of " + dev_string
def adjustlines(lines): newlines = [ ] # start with an empty list, in which the new lines will be put # adjust each line in a python 'for loop' for line in lines: # we know, from the way read_lines was constructed, that each element # in lines represents a line of the text file used as input ('filein') # As such, it is a python 'str' (for 'string'). There is a buildin way to # split strings into a list x1 = line.split(":") newline1 = x1[0] x2 = line.split(":") newline2 = x1[1] x3 = newline2.split() newline3 = x3[0] newline3a = re.sub(r"([-~*‘’])|(\[a\])", "", newline3) iastrev = remove_accent(newline3a) newline4 = sanscript.transliterate(newline1, 'slp1', 'iast') # We want to add the new line to our list of new lines. # 'append' is the way to do that newlines.append('%s' % "-" * 15) newlines.append('orig = %s' % line) newlines.append('slp1 = %s' % newline1) newlines.append('rest = %s' % newline2) newlines.append('iast = %s' % newline3a) newlines.append('iastrev = %s' % iastrev) newlines.append('slp-iast = %s' % newline4) # we're done with the for loop, so we go back one level of indentation # We need to return the newlines object that this function computed return newlines
def test_syllables(test_case): logging.debug(str(test_case)) pattern_lines = identify.to_pattern_lines(test_case["verse"].split("\n")) id_result = identify.identifier.IdentifyFromPatternLines(pattern_lines) assert 'exact' in id_result, id_result exact_matches = [sanscript.transliterate(metre.lower(), _from=sanscript.IAST, _to=sanscript.DEVANAGARI) for metre in id_result['exact'].keys()] assert exact_matches == test_case["exactMatches"], id_result