Example #1
0
def create_slp(code):
    """Create SLP file for a given dictionary code.

    code is to be selected from dictcode.json.
    """
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'orig', bookName + '.txt')
    fin = codecs.open(filein, 'r', 'utf-8')
    data = fin.read()
    fin.close()
    # Convert the data to SLP1.
    data = sanscript.transliterate(data, 'devanagari', 'slp1')
    # Output directory
    directory = os.path.join('..', fullName, 'slp')
    # Create if the directory does not exist.
    if not os.path.exists(directory):
        os.mkdir(directory)
    fileout = os.path.join(directory, bookName + '.txt')
    # Create output file and save the SLP data in it.
    fout = codecs.open(fileout, 'w', 'utf-8')
    fout.write(data)
    fout.close()
Example #2
0
def apply_annotation(code):
    """Apply autoannotation to the dictionary corresponding to given code."""
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'orig', bookName + '.txt')
    # Create output file.
    directory = os.path.join('..', fullName, 'annotated')
    if not os.path.exists(directory):
        os.mkdir(directory)
    fileout = os.path.join(directory, code + '0.txt')
    # Annotate the filein and store in fileout.
    annotate_kosha(filein, fileout)
Example #3
0
def search_in_dict(query, code):
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'slp', bookName + '.txt')
    result = []
    verseDetails = utils.VerseInfo()
    verse = ''
    writeVerse = False
    for lin in codecs.open(filein, 'r', 'utf-8'):
        if lin.startswith(';'):
            (tag, value) = utils.extract_tag(lin)
            if tag == 'p':
                verseDetails.update_pageNum(value)
            if tag == 'k':
                verseDetails.update_kanda(value)
            if tag == 'v':
                verseDetails.update_varga(value)
            if tag == 'vv':
                verseDetails.update_subvarga(value)
        elif re.search('^[$#]', lin):
            pass
        else:
            verse += lin
            if query in lin:
                writeVerse = True
            if '..' in lin:
                verseDetails.update_verseNum(verse)
                if writeVerse:
                    page = verseDetails.give_page_details()
                    kanda = verseDetails.kanda
                    varga = verseDetails.varga
                    adhyaya = verseDetails.subvarga
                    versenum = verseDetails.verseNum
                    result.append({
                        'verse': verse,
                        'page': page,
                        'versenum': versenum,
                        'kanda': kanda,
                        'varga': varga,
                        'adhyaya': adhyaya
                    })
                writeVerse = False
                verse = ''
    return result
Example #4
0
def replace_colon(code):
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'orig', bookName + '.txt')
    fin = codecs.open(filein, 'r', 'utf-8')
    data = fin.read()
    fin.close()
    data = re.sub(r'([^ps]):', r'\g<1>рдГ', data)
    directory = os.path.join('..', fullName, 'orig')
    if not os.path.exists(directory):
        os.mkdir(directory)
    fileout = os.path.join(directory, bookName + '.txt')
    fout = codecs.open(fileout, 'w', 'utf-8')
    fout.write(data)
    fout.close()
Example #5
0
def create_slp(code):
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'orig', bookName + '.txt')
    fin = codecs.open(filein, 'r', 'utf-8')
    data = fin.read()
    fin.close()
    data = sanscript.transliterate(data, 'devanagari', 'slp1')
    directory = os.path.join('..', fullName, 'slp')
    if not os.path.exists(directory):
        os.mkdir(directory)
    fileout = os.path.join(directory, bookName + '.txt')
    fout = codecs.open(fileout, 'w', 'utf-8')
    fout.write(data)
    fout.close()
def apply_changes(code):
    """Apply idempotent changes to file corresponding to the given code."""
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'orig', bookName + '.txt')
    fin = codecs.open(filein, 'r', 'utf-8')
    # Create path if does not exist.
    directory = os.path.join('..', fullName, 'orig')
    if not os.path.exists(directory):
        os.mkdir(directory)
    # Output file.
    fileout = os.path.join(directory, bookName + '1.txt')
    fout = codecs.open(fileout, 'w', 'utf-8')
    # Apply idempotent corrections to each line.
    for line in fin:
        line = idempotent_corrections(line)
        fout.write(line)
    fin.close()
    fout.close()
Example #7
0
    dom = ET.parse(xmlfile)
    xslt = ET.parse(xsltfile)
    transform = ET.XSLT(xslt)
    newdom = transform(dom)
    result = ET.tostring(newdom, pretty_print=True)
    fout = codecs.open(htmlfile, 'wb')
    fout.write(result)
    fout.close()
    print('HTML generated. Success!')


if __name__ == "__main__":
    # Read the unique code of dictionary from arguments. ENSK
    code = sys.argv[1]
    # ENSK -> ekaksharanamamala_sadhukalashagani
    fullName = utils.code_to_dict(code)
    # ekaksharanamamala, sadhukalashagani
    bookName, author = fullName.split('_')
    # Read the .txt file
    filein = os.path.join('..', fullName, 'orig', bookName + '.txt')
    fin = codecs.open(filein, 'r', 'utf-8')
    data = fin.read()
    fin.close()
    # Get filename of babylon to store the output
    babylonfile = os.path.join('..', fullName, 'babylon',
                               bookName + '.babylon')
    # Get directory to store MD.
    mdDirectory = os.path.join('..', fullName, 'md')
    # Get directory to store JSON.
    jsonfile = os.path.join('..', fullName, 'json', bookName + '.json')
    # Get filename of xml to store the output