Ejemplo n.º 1
0
    def do_action(options):
        """Act upon the indicated type"""

        finput = options['input']
        foutput = options['output']
        fcompare = options['compare']
        action = options['action']
        oerr = util.ErrHandle()

        result = None
        if action == "simple":
            result = Sfm.filter(finput, foutput, "simple")
        elif action == "compare":
            result = Sfm.compare(finput, fcompare, foutput)

        # Return the result
        return result
Ejemplo n.º 2
0
    def filter(finput, foutput, ftype):
        """Filter according to the indicated filter type"""

        oerr = util.ErrHandle()

        try:
            # r_sfm = re.compile(r"\\\w+[\s\*]")
            r_sfm = re.compile(r"\\\w+((\s)|(\*)|$)")

            # Read the input
            sfmfile = SfmFile(oerr)
            result = sfmfile.read(finput)

            if result:
                # Filter the input according to filter type
                lst_c = []
                for one_c in sfmfile.chapters:
                    verses = []
                    for one_v in one_c:
                        # Perform the filtering
                        if ftype == "none":
                            new_v = one_v
                        elif ftype == "simple":
                            # Perform simple filtering
                            new_v = r_sfm.sub("", one_v)
                            # Replace quotation marks
                            new_v = new_v.replace("«", "<<")
                            new_v = new_v.replace("»", ">>")
                            # Remove *g**
                            new_v = new_v.replace("*g**", "")
                        verses.append(new_v)
                    # Process the verses
                    lst_c.append(verses)

                # Write the new list as a file
                result = sfmfile.write(foutput, lst_c)
        except:
            msg = oerr.get_error_message()
            oerr.DoError("filter")
            result = False

        return result
Ejemplo n.º 3
0
"""
The classes in this models.py are used by the Python application hebot

"""

import os, sys
import copy
import json
import util

cat_dict = {}
cat_dict['CP-Conj']='ConjP'

errHandle = util.ErrHandle()

def get_error_message():
    arInfo = sys.exc_info()
    if len(arInfo) == 3:
        sMsg = str(arInfo[1])
        if arInfo[2] != None:
            sMsg += " at line " + str(arInfo[2].tb_lineno)
        return sMsg
    else:
        return ""


class HierObj(object):
    """Hierarchical object"""

    pos = ""        # Grammatical category
    txt = ""        # The text associated with this instance
Ejemplo n.º 4
0
class convert(object):
    """split texts"""

    errHandle = util.ErrHandle()
    folder = ""

    def __init__(self, **kwargs):
        # No real action here
        return super().__init__(**kwargs)

    def split2texts(self, options):
        """Split one large text into several smaller"""

        try:
            # Get the input file
            sInput = options['input']
            sOutput = options['output']
            self.folder = sOutput

            sHeader = ""  # Name of text
            lText = []  # Lines for a text

            # Open the input
            fInput = open(sInput, 'r', encoding='UTF8')
            # Walk through all the input lines
            for line in fInput.readlines():
                # Check if this identifies the start of a text: ( ICLE-
                if line.startswith("( ICLE-") or line.startswith("(ICLE-"):
                    # Need to chop off the part until the first right bracket
                    iPos = line.find(")")
                    if iPos >= 0:
                        self.writetext(sHeader, lText)

                        # Okay, this is the beginning of a text
                        sHeader = line[2:iPos - 1].replace(" ", "")
                        line = line[iPos + 1:].strip()
                # Add the line to the text
                lText.append(line)
            # After finishing: output the final text
            self.writetext(sHeader, lText)

            return True
        except:
            sMsg = self.errHandle.get_error_message()
            return False

    def writetext(self, sHeader, lText):
        try:
            # If existing, finish and save a previous text
            if sHeader != "":
                # Save current text
                sFile = self.folder + "/" + sHeader + ".txt"
                fOutput = open(sFile, 'w', encoding='UTF8')
                fOutput.writelines(lText)
                #for line_out in lText:
                #    line_out = line_out.replace("\n", "")
                #    line_out = line_out.replace("\r", "")
                #    fOutput.write(line_out + "\n")
                fOutput.close()
                lText.clear()
            return True
        except:
            sMsg = self.errHandle.get_error_message()
            return False
Ejemplo n.º 5
0
    def compare(finput, fcompare, foutput):
        """Compare two SFM files and return the result"""

        oerr = util.ErrHandle()

        def pre_process(sInput):
            sOutput = re.sub(r'\s\-+\s', " ", sInput)
            sOutput = sOutput.replace(u'\u0406', '\u04c0')
            sOutput = sOutput.replace("„", "<")
            sOutput = sOutput.replace('“', ">")
            sOutput = sOutput.replace('–', "")
            sOutput = sOutput.replace(".", "")
            sOutput = sOutput.replace(",", "")
            sOutput = sOutput.lower()
            return sOutput

        try:
            # Read both files
            sfminput = SfmFile(oerr)
            sfmcompare = SfmFile(oerr)

            bInput = sfminput.read(finput)
            bCompare = sfmcompare.read(fcompare)

            oInput = sfminput.chapters
            oCompare = sfmcompare.chapters

            lst_diff = []

            # Compare chapters
            for idx_c, ch_input in enumerate(oInput):
                # Get the corresponding comparison chapter
                ch_compare = oCompare[idx_c]
                # Compare verses
                for idx_v, vs_input in enumerate(ch_input):
                    vs_compare = ch_compare[idx_v]

                    # A bit more pre-processing on the verse contents
                    vs_input = pre_process(vs_input)
                    vs_compare = pre_process(vs_compare)

                    # Convert both verses into arrays
                    w_input = re.split(r'\s+', vs_input)
                    w_compare = re.split(r'\s+', vs_compare)

                    length = min(len(w_input), len(w_compare))
                    for idx in range(length):
                        if w_input[idx] != w_compare[idx]:
                            # Found the first difference
                            msg = "{}:{} at word #{} [{}] versus [{}]".format(
                                idx_c, idx_v, idx, w_input[idx],
                                w_compare[idx])
                            lst_diff.append(msg)
                            break
            # Combine the comparison
            sText = "\n".join(lst_diff)

            # Write as output
            with open(foutput, "w", encoding="utf-8") as f:
                f.write(sText)

            result = True
        except:
            msg = oerr.get_error_message()
            oerr.DoError("compare")
            result = False

        return result