Esempio n. 1
0
    def __init__( self ):
        """
        Constructor.
        """
        self.filenameBase = "BibleBookOrders"

        # These fields are used for parsing the XML
        self.treeTag = "BibleBookOrderSystem"
        self.headerTag = "header"
        self.mainElementTag = "book"

        # These fields are used for automatically checking/validating the XML
        self.compulsoryAttributes = ( "id", )
        self.optionalAttributes = ()
        self.uniqueAttributes = self.compulsoryAttributes + self.optionalAttributes
        self.compulsoryElements = ()
        self.optionalElements = ()
        self.uniqueElements = self.compulsoryElements + self.optionalElements


        # These are fields that we will fill later
        self.XMLSystems = {}
        self.__DataDicts, self.__DataLists = {}, {} # Used for import

        # Make sure we have the bible books codes data loaded and available
        self.BibleBooksCodes = BibleBooksCodes().loadData()
Esempio n. 2
0
    def __init__( self, folder ):
        """
        Create the object.
        """
        # Get the data tables that we need for proper checking
        self.BibleBooksCodes = BibleBooksCodes().loadData()

        self.folder = folder
        files = os.listdir( self.folder )
        if not files: raise IOError( _("No files in given folder: ") + self.folder)
        for foundFilename in files:
            if not foundFilename.endswith('~'):
                foundFileBit, foundExtBit = os.path.splitext( foundFilename )
                foundLength = len( foundFileBit )
                #print( foundFileBit, foundExtBit )
                containsDigits = False
                for char in foundFilename:
                    if char.isdigit():
                        containsDigits = True
                        break
                matched = False
                if foundLength>=8 and containsDigits and foundExtBit and foundExtBit[0]=='.':
                    for paratextBookCode,paratextDigits,bookReferenceCode in self.BibleBooksCodes.getAllParatextBooksCodeNumberTriples():
                        if paratextDigits in foundFileBit and (paratextBookCode in foundFileBit or paratextBookCode.upper() in foundFileBit):
                            digitsIndex = foundFileBit.index( paratextDigits )
                            paratextBookCodeIndex = foundFileBit.index(paratextBookCode) if paratextBookCode in foundFileBit else foundFileBit.index(paratextBookCode.upper())
                            paratextBookCode = foundFileBit[paratextBookCodeIndex:paratextBookCodeIndex+3]
                            #print( digitsIndex, paratextBookCodeIndex, paratextBookCode )
                            if digitsIndex==0 and paratextBookCodeIndex==2:
                                self.languageIndex = 5
                                self.languageCode = foundFileBit[self.languageIndex:self.languageIndex+foundLength-5]
                                self.digitsIndex = digitsIndex
                                self.paratextBookCodeIndex = paratextBookCodeIndex
                                self.pattern = "ddbbb" + 'n'*(foundLength-5)
                            elif foundLength==8 and digitsIndex==3 and paratextBookCodeIndex==5:
                                self.languageIndex = 0
                                self.languageCode = foundFileBit[self.languageIndex:self.languageIndex+foundLength-5]
                                self.digitsIndex = digitsIndex
                                self.paratextBookCodeIndex = paratextBookCodeIndex
                                self.pattern = "nnnddbbb"
                            else: raise ValueError( _("Unrecognized USFM filename template at ")+foundFileBit )
                            if self.languageCode.isupper(): self.pattern = self.pattern.replace( 'n', 'N' )
                            if paratextBookCode.isupper(): self.pattern = self.pattern.replace( 'bbb', 'BBB' )
                            self.fileExtension = foundExtBit[1:]
                            matched = True
                            break
                if matched: break
        if not matched:
            raise ValueError( _("Unable to recognize valid USFM files in ") + folder )
Esempio n. 3
0
class _BibleBookOrdersConverter:
    """
    A class to handle data for Bible book order systems.
    """

    def __init__( self ):
        """
        Constructor.
        """
        self.filenameBase = "BibleBookOrders"

        # These fields are used for parsing the XML
        self.treeTag = "BibleBookOrderSystem"
        self.headerTag = "header"
        self.mainElementTag = "book"

        # These fields are used for automatically checking/validating the XML
        self.compulsoryAttributes = ( "id", )
        self.optionalAttributes = ()
        self.uniqueAttributes = self.compulsoryAttributes + self.optionalAttributes
        self.compulsoryElements = ()
        self.optionalElements = ()
        self.uniqueElements = self.compulsoryElements + self.optionalElements


        # These are fields that we will fill later
        self.XMLSystems = {}
        self.__DataDicts, self.__DataLists = {}, {} # Used for import

        # Make sure we have the bible books codes data loaded and available
        self.BibleBooksCodes = BibleBooksCodes().loadData()
    # end of __init__

    def loadSystems( self, XMLFolder=None ):
        """
        Load and pre-process the specified book order systems.
        """
        if not self.XMLSystems: # Only ever do this once
            if XMLFolder==None: XMLFolder = "DataFiles/BookOrders"
            self.__XMLFolder = XMLFolder
            if Globals.verbosityLevel > 2: print( _("Loading book order systems from {}...").format( self.__XMLFolder ) )
            filenamePrefix = "BIBLEBOOKORDER_"
            for filename in os.listdir( XMLFolder ):
                filepart, extension = os.path.splitext( filename )
                if extension.upper() == '.XML' and filepart.upper().startswith(filenamePrefix):
                    bookOrderSystemCode = filepart[len(filenamePrefix):]
                    if Globals.verbosityLevel > 3: print( _("  Loading{} book order system from {}...").format( bookOrderSystemCode, filename ) )
                    self.XMLSystems[bookOrderSystemCode] = {}
                    self.XMLSystems[bookOrderSystemCode]["tree"] = ElementTree().parse( os.path.join( XMLFolder, filename ) )
                    assert( self.XMLSystems[bookOrderSystemCode]["tree"] ) # Fail here if we didn't load anything at all

                    # Check and remove the header element
                    if self.XMLSystems[bookOrderSystemCode]["tree"].tag  == self.treeTag:
                        header = self.XMLSystems[bookOrderSystemCode]["tree"][0]
                        if header.tag == self.headerTag:
                            self.XMLSystems[bookOrderSystemCode]["header"] = header
                            self.XMLSystems[bookOrderSystemCode]["tree"].remove( header )
                            if len(header)>1:
                                logging.info( _("Unexpected elements in header") )
                            elif len(header)==0:
                                logging.info( _("Missing work element in header") )
                            else:
                                work = header[0]
                                if work.tag == "work":
                                    self.XMLSystems[bookOrderSystemCode]["version"] = work.find("version").text
                                    self.XMLSystems[bookOrderSystemCode]["date"] = work.find("date").text
                                    self.XMLSystems[bookOrderSystemCode]["title"] = work.find("title").text
                                else:
                                    logging.warning( _("Missing work element in header") )
                        else:
                            logging.warning( _("Missing header element (looking for '{}' tag)").format( headerTag ) )
                    else:
                        logging.error( _("Expected to load '{}' but got '{}'").format( treeTag, self.XMLSystems[bookOrderSystemCode]["tree"].tag ) )
                    bookCount = 0 # There must be an easier way to do this
                    for subelement in self.XMLSystems[bookOrderSystemCode]["tree"]:
                        bookCount += 1
                    logging.info( _("    Loaded {} books").format( bookCount ) )

                if Globals.strictCheckingFlag:
                    self.__validateSystem( self.XMLSystems[bookOrderSystemCode]["tree"], bookOrderSystemCode )
        else: # The data must have been already loaded
            if XMLFolder is not None and XMLFolder!=self.__XMLFolder: logging.error( _("Bible book order systems are already loaded -- your different folder of '{}' was ignored").format( XMLFolder ) )
        return self
    # end of loadSystems

    def __validateSystem( self, bookOrderTree, systemName ):
        """ Do a semi-automatic check of the XML file validity. """
        assert( bookOrderTree )

        uniqueDict = {}
        for elementName in self.uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self.uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        expectedID = 1
        for k,element in enumerate(bookOrderTree):
            if element.tag == self.mainElementTag:
                # Check ascending ID field
                ID = element.get("id")
                intID = int( ID )
                if intID != expectedID:
                    logging.error( _("ID numbers out of sequence in record {} (got {} when expecting {}) for {}").format( k, intID, expectedID, systemName ) )
                expectedID += 1

                # Check that this is unique
                if element.text:
                    if element.text in uniqueDict:
                        logging.error( _("Found '{}' data repeated in '{}' element in record with ID '{}' (record {}) for {}").format( element.text, element.tag, ID, k, systemName ) )
                    uniqueDict[element.text] = None

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( _("Compulsory '{}' attribute is missing from {} element in record {}").format( attributeName, element.tag, k ) )
                    if not attributeValue:
                        logging.warning( _("Compulsory '{}' attribute is blank on {} element in record {}").format( attributeName, element.tag, k ) )

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( _("Optional '{}' attribute is blank on {} element in record {}").format( attributeName, element.tag, k ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning( _("Additional '{}' attribute ('{}') found on {} element in record {}").format( attributeName, attributeValue, element.tag, k ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( _("Found '{}' data repeated in '{}' field on {} element in record {}").format( attributeValue, attributeName, element.tag, k ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find( elementName ) is None:
                        logging.error( _("Compulsory '{}' element is missing in record with ID '{}' (record {})").format( elementName, ID, k ) )
                    if not element.find( elementName ).text:
                        logging.warning( _("Compulsory '{}' element is blank in record with ID '{}' (record {})").format( elementName, ID, k ) )

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find( elementName ) is not None:
                        if not element.find( elementName ).text:
                            logging.warning( _("Optional '{}' element is blank in record with ID '{}' (record {})").format( elementName, ID, k ) )

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning( _("Additional '{}' element ('{}') found in record with ID '{}' (record {})").format( subelement.tag, subelement.text, ID, k ) )

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find( elementName ) is not None:
                        text = element.find( elementName ).text
                        if text in uniqueDict["Element_"+elementName]:
                            logging.error( _("Found '{}' data repeated in '{}' element in record with ID '{}' (record {})").format( text, elementName, ID, k ) )
                        uniqueDict["Element_"+elementName].append( text )
            else:
                logging.warning( _("Unexpected element: {} in record {}").format( element.tag, k ) )
    # end of __validateSystem

    def __str__( self ):
        """
        This method returns the string representation of a Bible book order system.
        
        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        result = "_BibleBookOrdersConverter object"
        result += ('\n' if result else '') + "  Num book order systems loaded ={}".format( len(self.XMLSystems) )
        if 0: # Make it verbose
            for x in self.XMLSystems:
                result += ('\n' if result else '') + " {}".format( x )
                title = self.XMLSystems[x]["title"]
                if title: result += ('\n' if result else '') + "   {}".format( title )
                version = self.XMLSystems[x]["version"]
                if version: result += ('\n' if result else '') + "    Version:{}".format( version )
                date = self.XMLSystems[x]["date"]
                if date: result += ('\n' if result else '') + "    Last updated:{}".format( date )
                result += ('\n' if result else '') + "    Num books ={}".format( len(self.XMLSystems[x]["tree"]) )
        return result
    # end of __str__

    def __len__( self ):
        """ Returns the number of systems loaded. """
        return len( self.XMLSystems )
    # end of __len__

    def importDataToPython( self ):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        """
        assert( self.XMLSystems )
        if self.__DataDicts and self.__DataLists: # We've already done an import/restructuring -- no need to repeat it
            return self.__DataDicts, self.__DataLists

        # We'll create a number of dictionaries
        for bookOrderSystemCode in self.XMLSystems.keys():
            #print( bookOrderSystemCode )
            # Make the data dictionary for this book order system
            bookDataDict, idDataDict, BBBList = OrderedDict(), OrderedDict(), []
            for bookElement in self.XMLSystems[bookOrderSystemCode]["tree"]:
                bookRA = bookElement.text
                ID = bookElement.get( "id" )
                intID = int( ID )
                if not self.BibleBooksCodes.isValidReferenceAbbreviation( bookRA ):
                    logging.error( _("Unrecognized '{}' book abbreviation in '{}' book order system").format( bookRA, bookOrderSystemCode ) )
                # Save it by book reference abbreviation
                if bookRA in bookDataDict:
                    logging.error( _("Duplicate {} book reference abbreviations in '{}' book order system").format( bookRA, bookOrderSystemCode ) )
                bookDataDict[bookRA] = intID
                if intID in idDataDict:
                    logging.error( _("Duplicate {} ID (book index) numbers in '{}' book order system").format( intID, bookOrderSystemCode ) )
                idDataDict[intID] = bookRA
                BBBList.append( bookRA )

            if Globals.strictCheckingFlag: # check for duplicates
                for checkSystemCode in self.__DataLists:
                    if self.__DataLists[checkSystemCode] == BBBList:
                        logging.error( _("{} and {} book order systems are identical ({} books)").format( bookOrderSystemCode, checkSystemCode, len(BBBList) ) )

            # Now put it into my dictionaries for easy access
            self.__DataDicts[bookOrderSystemCode] = bookDataDict, idDataDict
            self.__DataLists[bookOrderSystemCode] = BBBList # Don't explicitly include the book index numbers, but otherwise the same information in a different form
        return self.__DataDicts, self.__DataLists
    # end of importDataToPython

    def exportDataToPython( self, filepath=None ):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ):
            """Exports theDict to theFile."""
            theFile.write( '  "{}": {{\n    # Key is{}\n    # Fields are:{}\n'.format( dictName, keyComment, fieldsComment ) )
            for dictKey in theDict.keys():
                theFile.write( '   {}:{},\n'.format( repr(dictKey), repr(theDict[dictKey]) ) )
            theFile.write( "  }}, # end of{} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDict

        from datetime import datetime

        assert( self.XMLSystems )
        self.importDataToPython()
        assert( self.__DataDicts and self.__DataLists )

        if not filepath: filepath = os.path.join( "DerivedFiles", self.filenameBase + "_Tables.py" )
        if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) )

        # Split into two dictionaries
        with open( filepath, 'wt' ) as myFile:
            myFile.write( "#{}\n#\n".format( filepath ) )
            myFile.write( "# This UTF-8 file was automatically generated by BibleBookOrders.py V{} on {}\n#\n".format( versionString, datetime.now() ) )
            #if self.title: myFile.write( "#{}\n".format( self.title ) )
            #if self.version: myFile.write( "#  Version:{}\n".format( self.version ) )
            #if self.date: myFile.write( "#  Date:{}\n#\n".format( self.date ) )
            #myFile.write( "#  {}{} entries loaded from the original XML file.\n".format( len(self.namesTree), self.treeTag ) )
            myFile.write( "#  {}{} loaded from the original XML files.\n#\n\n".format( len(self.XMLSystems), self.treeTag ) )
            myFile.write( "from collections import OrderedDict\n\n\n" )
            myFile.write( "bookDataDict = {\n  # Key is versificationSystemName\n  # Fields are omittedVersesSystem\n\n" )
            for systemName in self.__DataDicts:
                bookDataDict, idDataDict = self.__DataDicts[systemName]
                exportPythonDict( myFile, bookDataDict, systemName, "referenceAbbreviation", "id" )
            myFile.write( "}} # end of bookDataDict ({} systems)\n\n\n\n".format( len(self.__DataDicts) ) )
            myFile.write( "idDataDict = {\n  # Key is versificationSystemName\n  # Fields are omittedVersesSystem\n\n" )
            for systemName in self.__DataDicts:
                bookDataDict, idDataDict = self.__DataDicts[systemName]
                exportPythonDict( myFile, idDataDict, systemName, "id", "referenceAbbreviation" )
            myFile.write( "}} # end of idDataDict ({} systems)\n".format( len(self.__DataDicts) ) )
            myFile.write( "# end of{}".format( os.path.basename(filepath) ) )
    # end of exportDataToPython

    def exportDataToJSON( self, filepath=None ):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        from datetime import datetime
        import json

        assert( self.XMLSystems )
        self.importDataToPython()
        assert( self.__DataDicts and self.__DataLists )

        if not filepath: filepath = os.path.join( "DerivedFiles", self.filenameBase + "_Tables.json" )
        if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) )
        with open( filepath, 'wt' ) as myFile:
            #myFile.write( "#{}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON
            #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( versionString, datetime.now() ) )
            #if self.titleString: myFile.write( "#{} data\n".format( self.titleString ) )
            #if self.versionString: myFile.write( "#  Version:{}\n".format( self.versionString ) )
            #if self.dateString: myFile.write( "#  Date:{}\n#\n".format( self.dateString ) )
            #myFile.write( "#  {}{} loaded from the original XML file.\n#\n\n".format( len(self.XMLtree), self.treeTag ) )
            json.dump( self.__DataDicts, myFile, indent=2 )
            #myFile.write( "\n\n# end of{}".format( os.path.basename(filepath) ) )
    # end of exportDataToJSON

    def exportDataToC( self, filepath=None ):
        """
        Writes the information tables to a .h file that can be included in c and c++ programs.
        """
        def writeStructure( hFile, structName, structure ):
            """ Writes a typedef to the .h file. """
            hFile.write( "typedef struct{}EntryStruct {{\n".format( structName ) )
            for declaration in structure.split(';'):
                adjDeclaration = declaration.strip()
                if adjDeclaration: hFile.write( "   {};\n".format( adjDeclaration ) )
            hFile.write( "}}{}Entry;\n\n".format( structName ) )
        # end of writeStructure

        def exportPythonDict( cFile, theDict, dictName, structName, sortedBy, structure ):
            """ Exports theDict to the .h and .c files. """
            def convertEntry( entry ):
                """ Convert special characters in an entry... """
                result = ""
                if isinstance( entry, int ): result += str(entry)
                elif isinstance( entry, str): result += '"' + str(entry).replace('"','\\"') + '"'
                else:
                    for field in entry:
                        if result: result += ", " # Separate the fields
                        if field is None: result += '""'
                        elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"'
                        elif isinstance( field, int): result += str(field)
                        else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'").format( field, entry ) )
                return result
            # end of convertEntry

            #for dictKey in theDict.keys(): # Have to iterate this :(
            #    fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count
            #    break # We only check the first (random) entry we get
            fieldsCount = 2

            cFile.write( "const static{}\n{}[{}] = {{\n  // Fields ({}) are{}\n  // Sorted by{}\n".format( structName, dictName, len(theDict), fieldsCount, structure, sortedBy ) )
            for dictKey in sorted(theDict.keys()):
                if isinstance( dictKey, str ):
                    cFile.write( "  {{\"{}\",{}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) )
                elif isinstance( dictKey, int ):
                    cFile.write( "  {{{},{}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) )
                else:
                    logging.error( _("Can't handle this type of data yet: {}").format( dictKey ) )
            cFile.write( "}}; //{} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDict

        from datetime import datetime

        assert( self.XMLSystems )
        self.importDataToPython()
        assert( self.__DataDicts and self.__DataLists )

        if not filepath: filepath = os.path.join( "DerivedFiles", self.filenameBase + "_Tables" )
        hFilepath = filepath + '.h'
        cFilepath = filepath + '.c'
        if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( cFilepath ) ) # Don't bother telling them about the .h file
        ifdefName = self.filenameBase.upper() + "_Tables_h"

        with open( hFilepath, 'wt' ) as myHFile, open( cFilepath, 'wt' ) as myCFile:
            myHFile.write( "//{}\n//\n".format( hFilepath ) )
            myCFile.write( "//{}\n//\n".format( cFilepath ) )
            lines = "// This UTF-8 file was automatically generated by BibleBookOrders.py V{} on {}\n//\n".format( versionString, datetime.now() )
            myHFile.write( lines ); myCFile.write( lines )
            myCFile.write( "//  {}{} loaded from the original XML file.\n//\n\n".format( len(self.XMLSystems), self.treeTag ) )
            myHFile.write( "\n#ifndef{}\n#define{}\n\n".format( ifdefName, ifdefName ) )
            myCFile.write( '#include "{}"\n\n'.format( os.path.basename(hFilepath) ) )

            CHAR = "const unsigned char"
            BYTE = "const int"
            N1 = "bookOrderByRef"
            N2 = "bookOrderByIndex"
            S1 = "{} referenceAbbreviation[3+1];{} indexNumber;".format(CHAR,BYTE)
            S2 = "{} indexNumber;{} referenceAbbreviation[3+1];".format(BYTE,CHAR)
            writeStructure( myHFile, N1, S1 )
            writeStructure( myHFile, N2, S2 )
            writeStructure( myHFile, "table", "{}* systemName;{}Entry* byReference;{}Entry* byBook;".format(CHAR,N1,N2) ) # I'm not sure if I need one or two asterisks on those last two
                                                                                                        # They're supposed to be pointers to an array of structures
            myHFile.write( "#endif //{}\n\n".format( ifdefName ) )
            myHFile.write( "// end of{}".format( os.path.basename(hFilepath) ) )

            for systemName in self.__DataDicts: # Now write out the actual data into the .c file
                bookDataDict, idDataDict = self.__DataDicts[systemName]
                myCFile.write( "\n//{}\n".format( systemName ) )
                exportPythonDict( myCFile, bookDataDict, systemName+"BookDataDict", N1+"Entry", "referenceAbbreviation", S1 )
                exportPythonDict( myCFile, idDataDict, systemName+"IndexNumberDataDict", N2+"Entry", "indexNumber", S2 )

            # Write out the final table of pointers to the above information
            myCFile.write( "\n// Pointers to above data\nconst static tableEntry bookOrderSystemTable[{}] = {{\n".format( len(self.__DataDicts) ) )
            for systemName in self.__DataDicts: # Now write out the actual pointer data into the .c file
                myCFile.write( '  {{ "{}",{},{} }},\n'.format( systemName, systemName+"BookDataDict", systemName+"IndexNumberDataDict" ) )
            myCFile.write( "}}; //{} entries\n\n".format( len(self.__DataDicts) ) )
            myCFile.write( "// end of{}".format( os.path.basename(cFilepath) ) )
    # end of exportDataToC

    def obsoleteCheckBookOrderSystem( self, systemName, bookOrderSchemeToCheck ):
        """
        Check the given book order scheme against all the loaded systems.
        Create a new book order file if it doesn't match any.
        """
        assert( systemName )
        assert( bookOrderSchemeToCheck )
        assert( self.Lists )
        #print( systemName, bookOrderSchemeToCheck )

        matchedBookOrderSystemCodes = []
        systemMatchCount, systemMismatchCount, allErrors, errorSummary = 0, 0, '', ''
        for bookOrderSystemCode in self.Lists: # Step through the various reference schemes
            theseErrors = ''
            if self.Lists[bookOrderSystemCode] == bookOrderSchemeToCheck:
                #print( "  {} matches '{}' book order system".format( systemName, bookOrderSystemCode ) )
                systemMatchCount += 1
                matchedBookOrderSystemCodes.append( bookOrderSystemCode )
            else:
                if len(self.Lists[bookOrderSystemCode]) == len(bookOrderSchemeToCheck):
                    for BBB1,BBB2 in zip(self.Lists[bookOrderSystemCode],bookOrderSchemeToCheck):
                        if BBB1 != BBB2: break
                    thisError = "    Doesn't match '{}' system (Both have {} books, but {} instead of {})".format( bookOrderSystemCode, len(bookOrderSchemeToCheck), BBB1, BBB2 )
                else:
                    thisError = "    Doesn't match '{}' system ({} books instead of {})".format( bookOrderSystemCode, len(bookOrderSchemeToCheck), len(self.Lists[bookOrderSystemCode]) )
                theseErrors += ("\n" if theseErrors else "") + thisError
                errorSummary += ("\n" if errorSummary else "") + thisError
                systemMismatchCount += 1

        if systemMatchCount:
            if systemMatchCount == 1: # What we hope for
                print( _("  {} matched {} book order (with these {} books)").format( systemName, matchedBookOrderSystemCodes[0], len(bookOrderSchemeToCheck) ) )
                if Globals.commandLineOptions.debug: print( errorSummary )
            else:
                print( _("  {} matched {} book order system(s): {} (with these {} books)").format( systemName, systemMatchCount, matchedBookOrderSystemCodes, len(bookOrderSchemeToCheck) ) )
                if Globals.commandLineOptions.debug: print( errorSummary )
        else:
            print( _("  {} mismatched {} book order systems (with these {} books)").format( systemName, systemMismatchCount, len(bookOrderSchemeToCheck) ) )
            print( allErrors if Globals.commandLineOptions.debug else errorSummary )

        if Globals.commandLineOptions.export and not systemMatchCount: # Write a new file
            outputFilepath = os.path.join( "ScrapedFiles", "BibleBookOrder_"+systemName + ".xml" )
            print( _("Writing {} {} books to {}...").format( len(bookOrderSchemeToCheck), systemName, outputFilepath ) )
            with open( outputFilepath, 'wt' ) as myFile:
                for n,BBB in enumerate(bookOrderSchemeToCheck):
                    myFile.write( '  <book id="{}">{}</book>\n'.format( n+1,BBB ) )
                myFile.write( "</BibleBookOrderSystem>" )
Esempio n. 4
0
    text = "The quick brown fox jumped over the lazy brown dog."
    adjustments = [(36, 'lazy', 'fat'), (0, 'The', 'A'),
                   (20, 'jumped', 'tripped'), (4, '', 'very '),
                   (10, 'brown', 'orange')]
    print("\n{}->{}".format(repr(text),
                            repr(applyStringAdjustments(text, adjustments))))

    print("\ncpu_count", os.cpu_count())


# end of Globals.demo

setVerbosity(verbosityString)
if __name__ != '__main__':  # Load global Bible data sets
    from BibleBooksCodes import BibleBooksCodes
    BibleBooksCodes = BibleBooksCodes().loadData()
    from USFMMarkers import USFMMarkers
    USFMMarkers = USFMMarkers().loadData()
    USFMParagraphMarkers = USFMMarkers.getNewlineMarkersList('CanonicalText')
    #print( len(USFMParagraphMarkers), sorted(USFMParagraphMarkers) )
    #for marker in ( ):
    #print( marker )
    #USFMParagraphMarkers.remove( marker )
    # was 30 ['cls', 'li1', 'li2', 'li3', 'li4', 'm', 'mi', 'p', 'pc', 'ph1', 'ph2', 'ph3', 'ph4',
    #    'pi1', 'pi2', 'pi3', 'pi4', 'pm', 'pmc', 'pmo', 'pmr', 'pr', 'q1', 'q2', 'q3', 'q4',
    #    'qm1', 'qm2', 'qm3', 'qm4']
    # now 34 ['cls', 'li1', 'li2', 'li3', 'li4', 'm', 'mi', 'nb', 'p', 'pc', 'ph1', 'ph2', 'ph3', 'ph4',
    #    'pi1', 'pi2', 'pi3', 'pi4', 'pm', 'pmc', 'pmo', 'pmr', 'pr', 'q1', 'q2', 'q3', 'q4', 'qa', 'qc',
    #    'qm1', 'qm2', 'qm3', 'qm4', 'qr']
    #print( len(USFMParagraphMarkers), sorted(USFMParagraphMarkers) ); halt
Esempio n. 5
0
class USFMFilenames:
    """
    Class for creating and manipulating USFM Filenames.
    """

    def __init__( self, folder ):
        """
        Create the object.
        """
        # Get the data tables that we need for proper checking
        self.BibleBooksCodes = BibleBooksCodes().loadData()

        self.folder = folder
        files = os.listdir( self.folder )
        if not files: raise IOError( _("No files in given folder: ") + self.folder)
        for foundFilename in files:
            if not foundFilename.endswith('~'):
                foundFileBit, foundExtBit = os.path.splitext( foundFilename )
                foundLength = len( foundFileBit )
                #print( foundFileBit, foundExtBit )
                containsDigits = False
                for char in foundFilename:
                    if char.isdigit():
                        containsDigits = True
                        break
                matched = False
                if foundLength>=8 and containsDigits and foundExtBit and foundExtBit[0]=='.':
                    for paratextBookCode,paratextDigits,bookReferenceCode in self.BibleBooksCodes.getAllParatextBooksCodeNumberTriples():
                        if paratextDigits in foundFileBit and (paratextBookCode in foundFileBit or paratextBookCode.upper() in foundFileBit):
                            digitsIndex = foundFileBit.index( paratextDigits )
                            paratextBookCodeIndex = foundFileBit.index(paratextBookCode) if paratextBookCode in foundFileBit else foundFileBit.index(paratextBookCode.upper())
                            paratextBookCode = foundFileBit[paratextBookCodeIndex:paratextBookCodeIndex+3]
                            #print( digitsIndex, paratextBookCodeIndex, paratextBookCode )
                            if digitsIndex==0 and paratextBookCodeIndex==2:
                                self.languageIndex = 5
                                self.languageCode = foundFileBit[self.languageIndex:self.languageIndex+foundLength-5]
                                self.digitsIndex = digitsIndex
                                self.paratextBookCodeIndex = paratextBookCodeIndex
                                self.pattern = "ddbbb" + 'n'*(foundLength-5)
                            elif foundLength==8 and digitsIndex==3 and paratextBookCodeIndex==5:
                                self.languageIndex = 0
                                self.languageCode = foundFileBit[self.languageIndex:self.languageIndex+foundLength-5]
                                self.digitsIndex = digitsIndex
                                self.paratextBookCodeIndex = paratextBookCodeIndex
                                self.pattern = "nnnddbbb"
                            else: raise ValueError( _("Unrecognized USFM filename template at ")+foundFileBit )
                            if self.languageCode.isupper(): self.pattern = self.pattern.replace( 'n', 'N' )
                            if paratextBookCode.isupper(): self.pattern = self.pattern.replace( 'bbb', 'BBB' )
                            self.fileExtension = foundExtBit[1:]
                            matched = True
                            break
                if matched: break
        if not matched:
            raise ValueError( _("Unable to recognize valid USFM files in ") + folder )
        #print( self.pattern, self.fileExtension )
    # end of __init__
        

    def __str__( self ):
        """
        This method returns the string representation of an object.
        
        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        result = ""
        if self.pattern: result += ('\n' if result else '') + self.pattern
        if self.fileExtension: result += ('\n' if result else '') + self.fileExtension
        return result
    # end of __str___


    def possibleFiles( self ):
        """Return a list of valid USFM filenames"""
        filelist = []
        for paratextBookCode,paratextDigits,bookReferenceCode in self.BibleBooksCodes.getAllParatextBooksCodeNumberTriples():
            filename = "--------" # Eight characters
            filename = filename[:self.digitsIndex] + paratextDigits + filename[self.digitsIndex+len(paratextDigits):]
            filename = filename[:self.paratextBookCodeIndex] + paratextBookCode.upper() if 'BBB' in self.pattern else paratextBookCode + filename[self.paratextBookCodeIndex+len(paratextBookCode):]
            filename = filename[:self.languageIndex] + self.languageCode + filename[self.languageIndex+len(self.languageCode):]
            filename += '.' + self.fileExtension
            #print( filename )
            filelist.append( (bookReferenceCode,filename,) )
        return filelist
    # end of possibleFiles


    def actualFiles( self ):
        """Return a list of tuples of UPPER CASE book codes with actual (present) USFM filenames"""
        filelist = []
        for bookReferenceCode,possibleFilename in self.possibleFiles():
            possibleFilepath = os.path.join( self.folder, possibleFilename )
            #print( '  Looking for: ' + possibleFilename )
            if os.access( possibleFilepath, os.R_OK ):
                #paratextBookCode = possibleFilename[self.paratextBookCodeIndex:self.paratextBookCodeIndex+3].upper()
                filelist.append( (bookReferenceCode, possibleFilename,) )
        return filelist