def create_slp(code): """Create SLP file for a given dictionary code. code is to be selected from dictcode.json. """ # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') fin = codecs.open(filein, 'r', 'utf-8') data = fin.read() fin.close() # Convert the data to SLP1. data = sanscript.transliterate(data, 'devanagari', 'slp1') # Output directory directory = os.path.join('..', fullName, 'slp') # Create if the directory does not exist. if not os.path.exists(directory): os.mkdir(directory) fileout = os.path.join(directory, bookName + '.txt') # Create output file and save the SLP data in it. fout = codecs.open(fileout, 'w', 'utf-8') fout.write(data) fout.close()
def apply_annotation(code): """Apply autoannotation to the dictionary corresponding to given code.""" # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') # Create output file. directory = os.path.join('..', fullName, 'annotated') if not os.path.exists(directory): os.mkdir(directory) fileout = os.path.join(directory, code + '0.txt') # Annotate the filein and store in fileout. annotate_kosha(filein, fileout)
def search_in_dict(query, code): # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'slp', bookName + '.txt') result = [] verseDetails = utils.VerseInfo() verse = '' writeVerse = False for lin in codecs.open(filein, 'r', 'utf-8'): if lin.startswith(';'): (tag, value) = utils.extract_tag(lin) if tag == 'p': verseDetails.update_pageNum(value) if tag == 'k': verseDetails.update_kanda(value) if tag == 'v': verseDetails.update_varga(value) if tag == 'vv': verseDetails.update_subvarga(value) elif re.search('^[$#]', lin): pass else: verse += lin if query in lin: writeVerse = True if '..' in lin: verseDetails.update_verseNum(verse) if writeVerse: page = verseDetails.give_page_details() kanda = verseDetails.kanda varga = verseDetails.varga adhyaya = verseDetails.subvarga versenum = verseDetails.verseNum result.append({ 'verse': verse, 'page': page, 'versenum': versenum, 'kanda': kanda, 'varga': varga, 'adhyaya': adhyaya }) writeVerse = False verse = '' return result
def replace_colon(code): # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') fin = codecs.open(filein, 'r', 'utf-8') data = fin.read() fin.close() data = re.sub(r'([^ps]):', r'\g<1>рдГ', data) directory = os.path.join('..', fullName, 'orig') if not os.path.exists(directory): os.mkdir(directory) fileout = os.path.join(directory, bookName + '.txt') fout = codecs.open(fileout, 'w', 'utf-8') fout.write(data) fout.close()
def create_slp(code): # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') fin = codecs.open(filein, 'r', 'utf-8') data = fin.read() fin.close() data = sanscript.transliterate(data, 'devanagari', 'slp1') directory = os.path.join('..', fullName, 'slp') if not os.path.exists(directory): os.mkdir(directory) fileout = os.path.join(directory, bookName + '.txt') fout = codecs.open(fileout, 'w', 'utf-8') fout.write(data) fout.close()
def apply_changes(code): """Apply idempotent changes to file corresponding to the given code.""" # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') fin = codecs.open(filein, 'r', 'utf-8') # Create path if does not exist. directory = os.path.join('..', fullName, 'orig') if not os.path.exists(directory): os.mkdir(directory) # Output file. fileout = os.path.join(directory, bookName + '1.txt') fout = codecs.open(fileout, 'w', 'utf-8') # Apply idempotent corrections to each line. for line in fin: line = idempotent_corrections(line) fout.write(line) fin.close() fout.close()
dom = ET.parse(xmlfile) xslt = ET.parse(xsltfile) transform = ET.XSLT(xslt) newdom = transform(dom) result = ET.tostring(newdom, pretty_print=True) fout = codecs.open(htmlfile, 'wb') fout.write(result) fout.close() print('HTML generated. Success!') if __name__ == "__main__": # Read the unique code of dictionary from arguments. ENSK code = sys.argv[1] # ENSK -> ekaksharanamamala_sadhukalashagani fullName = utils.code_to_dict(code) # ekaksharanamamala, sadhukalashagani bookName, author = fullName.split('_') # Read the .txt file filein = os.path.join('..', fullName, 'orig', bookName + '.txt') fin = codecs.open(filein, 'r', 'utf-8') data = fin.read() fin.close() # Get filename of babylon to store the output babylonfile = os.path.join('..', fullName, 'babylon', bookName + '.babylon') # Get directory to store MD. mdDirectory = os.path.join('..', fullName, 'md') # Get directory to store JSON. jsonfile = os.path.join('..', fullName, 'json', bookName + '.json') # Get filename of xml to store the output