Ejemplo n.º 1
0
    def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None ):
        """
        Create the internal ESFM Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "ESFM Bible object"
        self.objectTypeString = "ESFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( "ESFMBible.load: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in '{}'".format( self.sourceFolder ) )
            return # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule):
            print( self.USFMFilenamesObject )

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
        if len(ssfFilepathList) == 1: # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData( self.ssfFilepath )

        self.name = self.givenName
        if self.name is None:
            for field in ('FullName','Name',):
                if field in self.settingsDict: self.name = self.settingsDict[field]; break
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "ESFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

        self.dontLoadBook = []
        self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {}
Ejemplo n.º 2
0
    def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None ):
        """
        Create the internal ESFM Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "ESFM Bible object"
        self.objectTypeString = "ESFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder ) )
            return # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print( self.USFMFilenamesObject )

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
        if len(ssfFilepathList) == 1: # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData( self.ssfFilepath )

        self.name = self.givenName
        if self.name is None:
            for field in ('FullName','Name',):
                if field in self.settingsDict: self.name = self.settingsDict[field]; break
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "ESFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

        self.dontLoadBook = []
        self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {}
Ejemplo n.º 3
0
    def preload( self ):
        """
        """
        if BibleOrgSysGlobals.debugFlag or debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2:
            print( t("preload() from {}").format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder ) )
            return # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print( self.USFMFilenamesObject )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        # Attempt to load the SSF file
        self.ssfFilepath = None
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
        if len(ssfFilepathList) == 1: # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            PTXSettingsDict = loadPTX7ProjectData( self, self.ssfFilepath )
            if PTXSettingsDict:
                if 'PTX7' not in self.suppliedMetadata: self.suppliedMetadata['PTX7'] = {}
                self.suppliedMetadata['PTX7']['SSF'] = PTXSettingsDict
                self.applySuppliedMetadata( 'SSF' ) # Copy some to BibleObject.settingsDict

        #self.name = self.givenName
        #if self.name is None:
            #for field in ('FullName','Name',):
                #if field in self.settingsDict: self.name = self.settingsDict[field]; break
        #if not self.name: self.name = os.path.basename( self.sourceFolder )
        #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        #if not self.name: self.name = "ESFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.availableBBBs.add( BBB )
            self.possibleFilenameDict[BBB] = filename

        self.preloadDone = True
Ejemplo n.º 4
0
class ESFMBible( Bible ):
    """
    Class to load and manipulate ESFM Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None ):
        """
        Create the internal ESFM Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "ESFM Bible object"
        self.objectTypeString = "ESFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder ) )
            return # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print( self.USFMFilenamesObject )

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
        if len(ssfFilepathList) == 1: # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData( self.ssfFilepath )

        self.name = self.givenName
        if self.name is None:
            for field in ('FullName','Name',):
                if field in self.settingsDict: self.name = self.settingsDict[field]; break
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "ESFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

        self.dontLoadBook = []
        self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {}
    # end of ESFMBible.__init_


    def loadSSFData( self, ssfFilepath ):
        """Process the SSF data from the given filepath.
            Returns a dictionary."""
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading SSF data from {!r}").format( ssfFilepath ) )
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open( ssfFilepath, encoding='utf-8' ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
                    logging.info( "ESFMBible.loadSSFData: Detected UTF-16 Byte Order Marker in {}".format( ssfFilepath ) )
                    line = line[1:] # Remove the Byte Order Marker
                if line[-1]=='\n': line = line[:-1] # Remove trailing newline character
                line = line.strip() # Remove leading and trailing whitespace
                if not line: continue # Just discard blank lines
                lastLine = line
                processed = False
                if status==0 and line=="<ScriptureText>":
                    status = 1
                    processed = True
                elif status==1 and line=="</ScriptureText>":
                    status = 2
                    processed = True
                elif status==1 and line[0]=='<' and line.endswith('/>'): # Handle a self-closing (empty) field
                    fieldname = line[1:-3] if line.endswith(' />') else line[1:-2] # Handle it with or without a space
                    if ' ' not in fieldname:
                        settingsDict[fieldname] = ''
                        processed = True
                    elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes
                        bits = fieldname.split( None, 1 )
                        if BibleOrgSysGlobals.debugFlag: assert( len(bits)==2 )
                        fieldname = bits[0]
                        attributes = bits[1]
                        #print( "attributes = {!r}".format( attributes) )
                        settingsDict[fieldname] = (contents, attributes)
                        processed = True
                elif status==1 and line[0]=='<' and line[-1]=='>':
                    ix1 = line.find('>')
                    ix2 = line.find('</')
                    if ix1!=-1 and ix2!=-1 and ix2>ix1:
                        fieldname = line[1:ix1]
                        contents = line[ix1+1:ix2]
                        if ' ' not in fieldname and line[ix2+2:-1]==fieldname:
                            settingsDict[fieldname] = contents
                            processed = True
                        elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes
                            bits = fieldname.split( None, 1 )
                            if BibleOrgSysGlobals.debugFlag: assert( len(bits)==2 )
                            fieldname = bits[0]
                            attributes = bits[1]
                            #print( "attributes = {!r}".format( attributes) )
                            if line[ix2+2:-1]==fieldname:
                                settingsDict[fieldname] = (contents, attributes)
                                processed = True
                if not processed: print( "ERROR: Unexpected {!r} line in SSF file".format( line ) )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print( "  " + _("Got {} SSF entries:").format( len(settingsDict) ) )
            if BibleOrgSysGlobals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    print( "    {}: {}".format( key, settingsDict[key] ) )
        self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings
        self.settingsDict = settingsDict.copy() # This will be all the combined settings
    # end of ESFMBible.loadSSFData


    def loadSemanticDictionary( self, BBB, filename ):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "    " + _("Loading possible semantic dictionary from {}...").format( filename ) )
        sourceFilepath = os.path.join( self.sourceFolder, filename )
        originalBook = ESFMFile()
        originalBook.read( sourceFilepath )

        count = 0
        for marker,originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' SEM' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in ESFM_SEMANTIC_TAGS \
                and originalText[1] == ' ' \
                and len(originalText)>2:
                    tagMarker = originalText[0]
                    tagContent = originalText[2:]
                    if tagMarker not in self.semanticDict: self.semanticDict[tagMarker] = {}
                    if tagContent not in self.semanticDict[tagMarker]: self.semanticDict[tagMarker][tagContent] = []
                    count += 1
        self.dontLoadBook.append( BBB )
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count: print( "{} semantic entries added in {} categories".format( count, len(self.semanticDict) ) )
            else: print( "No semantic entries found." )
    # end of ESFMBible.loadSemanticDictionary


    def loadStrongsDictionary( self, BBB, filename ):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "    " + _("Loading possible Strong's dictionary from {}...").format( filename ) )
        sourceFilepath = os.path.join( self.sourceFolder, filename )
        originalBook = ESFMFile()
        originalBook.read( sourceFilepath )

        count = 0
        for marker,originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' STR' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in 'HG':
                    tagMarker = originalText[0]
                    sNumber = originalText[1:]
            elif marker == 'html':
                dictEntry = originalText
                if tagMarker not in self.StrongsDict: self.StrongsDict[tagMarker] = {}
                if sNumber not in self.StrongsDict[tagMarker]: self.StrongsDict[tagMarker][sNumber] = dictEntry
                count += 1
        self.dontLoadBook.append( BBB )
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count: print( "{} Strong's entries added in {} categories".format( count, len(self.StrongsDict) ) )
            else: print( "No Strong's entries found." )
    # end of ESFMBible.loadStrongsDictionary


    def loadDictionaries( self ):
        """
        Attempts to load the spelling, hyphenation, and semantic dictionaries if they exist.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "  " + _("Loading any dictionaries...") )
        for BBB,filename in self.maximumPossibleFilenameTuples:
            if BBB=='XXD': self.loadSemanticDictionary( BBB, filename )
            elif BBB=='XXE': self.loadStrongsDictionary( BBB, filename )
    # end of ESFMBible.loadDictionaries


    def loadBook( self, BBB, filename=None ):
        """
        Load the requested book if it's not already loaded.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBible.loadBook( {}, {} )".format( BBB, filename ) )
        if BBB in self.books: return # Already loaded
        if BBB in self.dontLoadBook: return # Must be a dictionary that's already loaded
        if BBB in self.triedLoadingBook:
            logging.warning( "We had already tried loading ESFM {} for {}".format( BBB, self.name ) )
            return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            try: print( _("  ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
            except UnicodeEncodeError: print( _("  ESFMBible: Loading {}...").format( BBB ) )
        if filename is None: filename = self.possibleFilenameDict[BBB]
        EBB = ESFMBibleBook( self, BBB )
        EBB.load( filename, self.sourceFolder )
        if EBB._rawLines:
            EBB.validateMarkers() # Usually activates InternalBibleBook.processLines()
            self.saveBook( EBB )
        else: logging.info( "ESFM book {} was completely blank".format( BBB ) )
    # end of ESFMBible.loadBook


    def _loadBookMP( self, BBB_Filename ):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing)

        Parameter is a 2-tuple containing BBB and the filename.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3: print( "ESFMBible.loadBookMP( {} )".format( BBB_Filename ) )
        BBB, filename = BBB_Filename
        assert( BBB not in self.books )
        if BBB in self.dontLoadBook: return None
        self.triedLoadingBook[BBB] = True
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print( _("  ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
        EBB = ESFMBibleBook( self, BBB )
        EBB.load( self.possibleFilenameDict[BBB], self.sourceFolder )
        EBB.validateMarkers() # Usually activates InternalBibleBook.processLines()
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("    Finishing loading ESFM book {}.").format( BBB ) )
        return EBB
    # end of ESFMBible.loadBookMP


    def load( self ):
        """
        Load all the books.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("ESFMBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        if self.maximumPossibleFilenameTuples:
            # First try to load the dictionaries
            self.loadDictionaries()
            # Now load the books
            if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible
                #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print( _("ESFMBible: Loading {} books using {} CPUs...").format( len(self.maximumPossibleFilenameTuples), BibleOrgSysGlobals.maxProcesses ) )
                    print( "  NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." )
                with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes
                    results = pool.map( self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads
                    assert( len(results) == len(self.maximumPossibleFilenameTuples) )
                    for bBook in results:
                        if bBook is not None: self.saveBook( bBook ) # Saves them in the correct order
            else: # Just single threaded
                # Load the books one by one -- assuming that they have regular Paratext style filenames
                for BBB,filename in self.maximumPossibleFilenameTuples:
                    #if BibleOrgSysGlobals.verbosityLevel > 1 or BibleOrgSysGlobals.debugFlag:
                        #print( _("  ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
                    if BBB not in self.dontLoadBook:
                        loadedBook = self.loadBook( BBB, filename ) # also saves it
        else:
            logging.critical( _("ESFMBible: No books to load in {}!").format( self.sourceFolder ) )
        #print( self.getBookList() )
        if 'Tag errors' in self.semanticDict: print( "Tag errors:", self.semanticDict['Tag errors'] )
        if 'Missing' in self.semanticDict: print( "Missing:", self.semanticDict['Missing'] )
        self.doPostLoadProcessing()
Ejemplo n.º 5
0
    def preload(self):
        """
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print(t("preload() from {}").format(self.sourceFolder))

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error("Not sure what {!r} is in {}!".format(
                    somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    "ESFMBible.load: Surprised to see subfolders in {!r}: {}".
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0:
                print("ESFMBible: Couldn't find any files in {!r}".format(
                    self.sourceFolder))
            return  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if BibleOrgSysGlobals.verbosityLevel > 3 or (
                BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print(self.USFMFilenamesObject)

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        # Attempt to load the SSF file
        self.ssfFilepath = None
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
            searchAbove=True, auto=True)
        if len(ssfFilepathList) == 1:  # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            PTXSettingsDict = loadPTX7ProjectData(self, self.ssfFilepath)
            if PTXSettingsDict:
                if 'PTX7' not in self.suppliedMetadata:
                    self.suppliedMetadata['PTX7'] = {}
                self.suppliedMetadata['PTX7']['SSF'] = PTXSettingsDict
                self.applySuppliedMetadata(
                    'SSF')  # Copy some to BibleObject.settingsDict

        #self.name = self.givenName
        #if self.name is None:
        #for field in ('FullName','Name',):
        #if field in self.settingsDict: self.name = self.settingsDict[field]; break
        #if not self.name: self.name = os.path.basename( self.sourceFolder )
        #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        #if not self.name: self.name = "ESFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.availableBBBs.add(BBB)
            self.possibleFilenameDict[BBB] = filename

        self.preloadDone = True
Ejemplo n.º 6
0
def ESFMBibleFileCheck(givenFolderName,
                       strictCheck=True,
                       autoLoad=False,
                       autoLoadBooks=False):
    """
    Given a folder, search for ESFM Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one ESFM Bible is found,
        returns the loaded ESFMBible object.
    """
    if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2:
        print("ESFMBibleFileCheck( {}, {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
        assert givenFolderName and isinstance(givenFolderName, str)
        assert autoLoad in (
            True,
            False,
        ) and autoLoadBooks in (True, False)

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format(
                givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("ESFMBibleFileCheck: Given {!r} path is not a folder").format(
                givenFolderName))
        return False

    # Check that there's a USFM Bible here first
    from USFMBible import USFMBibleFileCheck
    if not USFMBibleFileCheck(givenFolderName, strictCheck,
                              discountSSF=False):  # no autoloads
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(" ESFMBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something == '__MACOSX':
                continue  # don't visit these directories
            foundFolders.append(something)
        #elif os.path.isfile( somepath ):
        #somethingUpper = something.upper()
        #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
        ##ignore = False
        ##for ending in filenameEndingsToIgnore:
        ##if somethingUpper.endswith( ending): ignore=True; break
        ##if ignore: continue
        ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
        ##foundFiles.append( something )
        #if somethingUpperExt not in filenameEndingsToAccept: continue
        #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
        #firstLine = BibleOrgSysGlobals.peekIntoFile( something, givenFolderName )
        ##print( 'E1', repr(firstLine) )
        #if firstLine is None: continue # seems we couldn't decode the file
        #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
        #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) )
        #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
        #if not firstLine: continue # don't allow a blank first line
        #if firstLine[0] != '\\': continue # Must start with a backslash
        #foundFiles.append( something )

    # See if there's an ESFMBible project here in this given folder
    numFound = 0
    UFns = USFMFilenames(
        givenFolderName
    )  # Assuming they have standard Paratext style filenames
    if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns)
    filenameTuples = UFns.getMaximumPossibleFilenameTuples(
        strictCheck=strictCheck)  # Returns (BBB,filename) 2-tuples
    for BBB, fn in filenameTuples[:]:  # Only accept our specific file extensions
        acceptFlag = False
        for fna in filenameEndingsToAccept:
            if fn.endswith(fna): acceptFlag = True
        if not acceptFlag: filenameTuples.remove((BBB, fn))
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print("  Confirmed:", len(filenameTuples), filenameTuples)
    if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples:
        print("  Found {} ESFM file{}.".format(
            len(filenameTuples), '' if len(filenameTuples) == 1 else 's'))
    if filenameTuples:
        SSFs = UFns.getSSFFilenames()
        if SSFs:
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print("Got ESFM SSFs: ({}) {}".format(len(SSFs), SSFs))
            ssfFilepath = os.path.join(givenFolderName, SSFs[0])
        numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ESFMBibleFileCheck got", numFound, givenFolderName)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            eB = ESFMBible(givenFolderName)
            if autoLoadBooks: eB.load()  # Load and process the file
            return eB
        return numFound

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format(
                    tryFolderName))
            continue
        #if BibleOrgSysGlobals.verbosityLevel > 3: print( "    ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName ) )
        #foundSubfolders, foundSubfiles = [], []
        #for something in os.listdir( tryFolderName ):
        #somepath = os.path.join( givenFolderName, thisFolderName, something )
        #if os.path.isdir( somepath ): foundSubfolders.append( something )
        #elif os.path.isfile( somepath ):
        #somethingUpper = something.upper()
        #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
        ##ignore = False
        ##for ending in filenameEndingsToIgnore:
        ##if somethingUpper.endswith( ending): ignore=True; break
        ##if ignore: continue
        ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
        ##foundSubfiles.append( something )
        #if somethingUpperExt not in filenameEndingsToAccept: continue
        #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
        #firstLine = BibleOrgSysGlobals.peekIntoFile( something, tryFolderName )
        ##print( 'E2', repr(firstLine) )
        #if firstLine is None: continue # seems we couldn't decode the file
        #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
        #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) )
        #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
        #if not firstLine: continue # don't allow a blank first line
        #if firstLine[0] != '\\': continue # Must start with a backslash
        #foundSubfiles.append( something )

        # See if there's an ESFM Bible here in this folder
        UFns = USFMFilenames(
            tryFolderName
        )  # Assuming they have standard Paratext style filenames
        if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns)
        filenameTuples = UFns.getMaximumPossibleFilenameTuples(
            strictCheck=strictCheck)  # Returns (BBB,filename) 2-tuples
        for BBB, fn in filenameTuples[:]:  # Only accept our specific file extensions
            acceptFlag = False
            for fna in filenameEndingsToAccept:
                if fn.endswith(fna): acceptFlag = True
            if not acceptFlag: filenameTuples.remove((BBB, fn))
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("  Confirmed:", len(filenameTuples), filenameTuples)
        if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples:
            print("  Found {} ESFM files: {}".format(len(filenameTuples),
                                                     filenameTuples))
        elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples:
            print("  Found {} ESFM file{}".format(
                len(filenameTuples), '' if len(filenameTuples) == 1 else 's'))
        if filenameTuples:
            SSFs = UFns.getSSFFilenames(searchAbove=True)
            if SSFs:
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print("Got ESFM SSFs: ({}) {}".format(len(SSFs), SSFs))
                ssfFilepath = os.path.join(thisFolderName, SSFs[0])
            foundProjects.append(tryFolderName)
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ESFMBibleFileCheck foundProjects", numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = ESFMBible(foundProjects[0])
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 7
0
def ESFMBibleFileCheck( givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False ):
    """
    Given a folder, search for ESFM Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one ESFM Bible is found,
        returns the loaded ESFMBible object.
    """
    if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2:
        print( "ESFMBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks ) )
    if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
        assert givenFolderName and isinstance( givenFolderName, str )
        assert autoLoad in (True,False,) and autoLoadBooks in (True,False)

    # Check that the given folder is readable
    if not os.access( givenFolderName, os.R_OK ):
        logging.critical( _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName ) )
        return False
    if not os.path.isdir( givenFolderName ):
        logging.critical( _("ESFMBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName ) )
        return False

    # Check that there's a USFM Bible here first
    from USFMBible import USFMBibleFileCheck
    if not USFMBibleFileCheck( givenFolderName, strictCheck, discountSSF=False ): # no autoloads
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in given {}".format( givenFolderName ) )
    foundFolders, foundFiles = [], []
    for something in os.listdir( givenFolderName ):
        somepath = os.path.join( givenFolderName, something )
        if os.path.isdir( somepath ):
            if something == '__MACOSX': continue # don't visit these directories
            foundFolders.append( something )
        #elif os.path.isfile( somepath ):
            #somethingUpper = something.upper()
            #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
            ##ignore = False
            ##for ending in filenameEndingsToIgnore:
                ##if somethingUpper.endswith( ending): ignore=True; break
            ##if ignore: continue
            ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                ##foundFiles.append( something )
            #if somethingUpperExt not in filenameEndingsToAccept: continue
            #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                #firstLine = BibleOrgSysGlobals.peekIntoFile( something, givenFolderName )
                ##print( 'E1', repr(firstLine) )
                #if firstLine is None: continue # seems we couldn't decode the file
                #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
                    #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) )
                    #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
                #if not firstLine: continue # don't allow a blank first line
                #if firstLine[0] != '\\': continue # Must start with a backslash
            #foundFiles.append( something )

    # See if there's an ESFMBible project here in this given folder
    numFound = 0
    UFns = USFMFilenames( givenFolderName ) # Assuming they have standard Paratext style filenames
    if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns )
    filenameTuples = UFns.getMaximumPossibleFilenameTuples( strictCheck=strictCheck ) # Returns (BBB,filename) 2-tuples
    for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions
        acceptFlag = False
        for fna in filenameEndingsToAccept:
            if fn.endswith( fna ): acceptFlag = True
        if not acceptFlag: filenameTuples.remove( (BBB,fn) )
    if BibleOrgSysGlobals.verbosityLevel > 3: print( "  Confirmed:", len(filenameTuples), filenameTuples )
    if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( "  Found {} ESFM file{}.".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) )
    if filenameTuples:
        SSFs = UFns.getSSFFilenames()
        if SSFs:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got ESFM SSFs: ({}) {}".format( len(SSFs), SSFs ) )
            ssfFilepath = os.path.join( givenFolderName, SSFs[0] )
        numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck got", numFound, givenFolderName )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            eB = ESFMBible( givenFolderName )
            if autoLoadBooks: eB.load() # Load and process the file
            return eB
        return numFound

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted( foundFolders ):
        tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' )
        if not os.access( tryFolderName, os.R_OK ): # The subfolder is not readable
            logging.warning( _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName ) )
            continue
        #if BibleOrgSysGlobals.verbosityLevel > 3: print( "    ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName ) )
        #foundSubfolders, foundSubfiles = [], []
        #for something in os.listdir( tryFolderName ):
            #somepath = os.path.join( givenFolderName, thisFolderName, something )
            #if os.path.isdir( somepath ): foundSubfolders.append( something )
            #elif os.path.isfile( somepath ):
                #somethingUpper = something.upper()
                #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ##ignore = False
                ##for ending in filenameEndingsToIgnore:
                    ##if somethingUpper.endswith( ending): ignore=True; break
                ##if ignore: continue
                ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    ##foundSubfiles.append( something )
                #if somethingUpperExt not in filenameEndingsToAccept: continue
                #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    #firstLine = BibleOrgSysGlobals.peekIntoFile( something, tryFolderName )
                    ##print( 'E2', repr(firstLine) )
                    #if firstLine is None: continue # seems we couldn't decode the file
                    #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
                        #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) )
                        #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
                    #if not firstLine: continue # don't allow a blank first line
                    #if firstLine[0] != '\\': continue # Must start with a backslash
                #foundSubfiles.append( something )

        # See if there's an ESFM Bible here in this folder
        UFns = USFMFilenames( tryFolderName ) # Assuming they have standard Paratext style filenames
        if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns )
        filenameTuples = UFns.getMaximumPossibleFilenameTuples( strictCheck=strictCheck ) # Returns (BBB,filename) 2-tuples
        for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions
            acceptFlag = False
            for fna in filenameEndingsToAccept:
                if fn.endswith( fna ): acceptFlag = True
            if not acceptFlag: filenameTuples.remove( (BBB,fn) )
        if BibleOrgSysGlobals.verbosityLevel > 3: print( "  Confirmed:", len(filenameTuples), filenameTuples )
        if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples: print( "  Found {} ESFM files: {}".format( len(filenameTuples), filenameTuples ) )
        elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( "  Found {} ESFM file{}".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) )
        if filenameTuples:
            SSFs = UFns.getSSFFilenames( searchAbove=True )
            if SSFs:
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got ESFM SSFs: ({}) {}".format( len(SSFs), SSFs ) )
                ssfFilepath = os.path.join( thisFolderName, SSFs[0] )
            foundProjects.append( tryFolderName )
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck foundProjects", numFound, foundProjects )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = ESFMBible( foundProjects[0] )
            if autoLoadBooks: uB.load() # Load and process the file
            return uB
        return numFound
Ejemplo n.º 8
0
    def __init__(self,
                 sourceFolder,
                 givenName=None,
                 givenAbbreviation=None,
                 encoding=None):
        """
        Create the internal USFM Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "USFM Bible object"
        self.objectTypeString = "USFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error(
                    t("__init__: Not sure what '{}' is in {}!").format(
                        somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    t("__init__: Surprised to see subfolders in '{}': {}").
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if Globals.verbosityLevel > 0:
                print(
                    t("__init__: Couldn't find any files in '{}'").format(
                        self.sourceFolder))
            raise FileNotFoundError  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if Globals.verbosityLevel > 3 or (Globals.debugFlag
                                          and debuggingThisModule):
            print("USFMFilenamesObject", self.USFMFilenamesObject)

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
            searchAbove=True, auto=True)
        if len(ssfFilepathList) == 1:  # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData(self.ssfFilepath)
            if self.encoding is None and 'Encoding' in self.ssfDict:  # See if the SSF file gives some help to us
                ssfEncoding = self.ssfDict['Encoding']
                if ssfEncoding == '65001': self.encoding = 'utf-8'
                else:
                    if Globals.verbosityLevel > 0:
                        print(
                            t("__init__: File encoding in SSF is set to '{}'").
                            format(ssfEncoding))
                    if ssfEncoding.isdigit():
                        self.encoding = 'cp' + ssfEncoding
                        if Globals.verbosityLevel > 0:
                            print(
                                t("__init__: Switched to '{}' file encoding").
                                format(self.encoding))
                    else:
                        logging.critical(
                            t("__init__: Unsure how to handle '{}' file encoding"
                              ).format(ssfEncoding))

        self.name = self.givenName
        if self.name is None:
            for field in (
                    'FullName',
                    'Name',
            ):
                if field in self.settingsDict:
                    self.name = self.settingsDict[field]
                    break
        if not self.name: self.name = os.path.basename(self.sourceFolder)
        if not self.name:
            self.name = os.path.basename(
                self.sourceFolder[:-1])  # Remove the final slash
        if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename
Ejemplo n.º 9
0
class ESFMBible(Bible):
    """
    Class to load and manipulate ESFM Bibles.

    """
    def __init__(self, sourceFolder, givenName=None, givenAbbreviation=None):
        """
        Create the internal ESFM Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "ESFM Bible object"
        self.objectTypeString = "ESFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error("Not sure what {!r} is in {}!".format(
                    somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    "ESFMBible.load: Surprised to see subfolders in {!r}: {}".
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0:
                print("ESFMBible: Couldn't find any files in {!r}".format(
                    self.sourceFolder))
            return  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if BibleOrgSysGlobals.verbosityLevel > 3 or (
                BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print(self.USFMFilenamesObject)

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
            searchAbove=True, auto=True)
        if len(ssfFilepathList) == 1:  # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData(self.ssfFilepath)

        self.name = self.givenName
        if self.name is None:
            for field in (
                    'FullName',
                    'Name',
            ):
                if field in self.settingsDict:
                    self.name = self.settingsDict[field]
                    break
        if not self.name: self.name = os.path.basename(self.sourceFolder)
        if not self.name:
            self.name = os.path.basename(
                self.sourceFolder[:-1])  # Remove the final slash
        if not self.name: self.name = "ESFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

        self.dontLoadBook = []
        self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {}

    # end of ESFMBible.__init_

    def loadSSFData(self, ssfFilepath):
        """Process the SSF data from the given filepath.
            Returns a dictionary."""
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading SSF data from {!r}").format(ssfFilepath))
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open(ssfFilepath, encoding='utf-8'
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and line and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        "ESFMBible.loadSSFData: Detected UTF-16 Byte Order Marker in {}"
                        .format(ssfFilepath))
                    line = line[1:]  # Remove the Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Remove trailing newline character
                line = line.strip()  # Remove leading and trailing whitespace
                if not line: continue  # Just discard blank lines
                lastLine = line
                processed = False
                if status == 0 and line == "<ScriptureText>":
                    status = 1
                    processed = True
                elif status == 1 and line == "</ScriptureText>":
                    status = 2
                    processed = True
                elif status == 1 and line[0] == '<' and line.endswith(
                        '/>'):  # Handle a self-closing (empty) field
                    fieldname = line[1:-3] if line.endswith(' />') else line[
                        1:-2]  # Handle it with or without a space
                    if ' ' not in fieldname:
                        settingsDict[fieldname] = ''
                        processed = True
                    elif ' ' in fieldname:  # Some fields (like "Naming") may contain attributes
                        bits = fieldname.split(None, 1)
                        if BibleOrgSysGlobals.debugFlag:
                            assert (len(bits) == 2)
                        fieldname = bits[0]
                        attributes = bits[1]
                        #print( "attributes = {!r}".format( attributes) )
                        settingsDict[fieldname] = (contents, attributes)
                        processed = True
                elif status == 1 and line[0] == '<' and line[-1] == '>':
                    ix1 = line.find('>')
                    ix2 = line.find('</')
                    if ix1 != -1 and ix2 != -1 and ix2 > ix1:
                        fieldname = line[1:ix1]
                        contents = line[ix1 + 1:ix2]
                        if ' ' not in fieldname and line[ix2 +
                                                         2:-1] == fieldname:
                            settingsDict[fieldname] = contents
                            processed = True
                        elif ' ' in fieldname:  # Some fields (like "Naming") may contain attributes
                            bits = fieldname.split(None, 1)
                            if BibleOrgSysGlobals.debugFlag:
                                assert (len(bits) == 2)
                            fieldname = bits[0]
                            attributes = bits[1]
                            #print( "attributes = {!r}".format( attributes) )
                            if line[ix2 + 2:-1] == fieldname:
                                settingsDict[fieldname] = (contents,
                                                           attributes)
                                processed = True
                if not processed:
                    print(
                        "ERROR: Unexpected {!r} line in SSF file".format(line))
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " + _("Got {} SSF entries:").format(len(settingsDict)))
            if BibleOrgSysGlobals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    print("    {}: {}".format(key, settingsDict[key]))
        self.ssfDict = settingsDict  # We'll keep a copy of just the SSF settings
        self.settingsDict = settingsDict.copy(
        )  # This will be all the combined settings

    # end of ESFMBible.loadSSFData

    def loadSemanticDictionary(self, BBB, filename):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("    " +
                  _("Loading possible semantic dictionary from {}...").format(
                      filename))
        sourceFilepath = os.path.join(self.sourceFolder, filename)
        originalBook = ESFMFile()
        originalBook.read(sourceFilepath)

        count = 0
        for marker, originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' SEM' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in ESFM_SEMANTIC_TAGS \
                and originalText[1] == ' ' \
                and len(originalText)>2:
                    tagMarker = originalText[0]
                    tagContent = originalText[2:]
                    if tagMarker not in self.semanticDict:
                        self.semanticDict[tagMarker] = {}
                    if tagContent not in self.semanticDict[tagMarker]:
                        self.semanticDict[tagMarker][tagContent] = []
                    count += 1
        self.dontLoadBook.append(BBB)
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count:
                print("{} semantic entries added in {} categories".format(
                    count, len(self.semanticDict)))
            else:
                print("No semantic entries found.")

    # end of ESFMBible.loadSemanticDictionary

    def loadStrongsDictionary(self, BBB, filename):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("    " +
                  _("Loading possible Strong's dictionary from {}...").format(
                      filename))
        sourceFilepath = os.path.join(self.sourceFolder, filename)
        originalBook = ESFMFile()
        originalBook.read(sourceFilepath)

        count = 0
        for marker, originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' STR' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in 'HG':
                    tagMarker = originalText[0]
                    sNumber = originalText[1:]
            elif marker == 'html':
                dictEntry = originalText
                if tagMarker not in self.StrongsDict:
                    self.StrongsDict[tagMarker] = {}
                if sNumber not in self.StrongsDict[tagMarker]:
                    self.StrongsDict[tagMarker][sNumber] = dictEntry
                count += 1
        self.dontLoadBook.append(BBB)
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count:
                print("{} Strong's entries added in {} categories".format(
                    count, len(self.StrongsDict)))
            else:
                print("No Strong's entries found.")

    # end of ESFMBible.loadStrongsDictionary

    def loadDictionaries(self):
        """
        Attempts to load the spelling, hyphenation, and semantic dictionaries if they exist.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  " + _("Loading any dictionaries..."))
        for BBB, filename in self.maximumPossibleFilenameTuples:
            if BBB == 'XXD': self.loadSemanticDictionary(BBB, filename)
            elif BBB == 'XXE': self.loadStrongsDictionary(BBB, filename)

    # end of ESFMBible.loadDictionaries

    def loadBook(self, BBB, filename=None):
        """
        Load the requested book if it's not already loaded.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ESFMBible.loadBook( {}, {} )".format(BBB, filename))
        if BBB in self.books: return  # Already loaded
        if BBB in self.dontLoadBook:
            return  # Must be a dictionary that's already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading ESFM {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            try:
                print(
                    _("  ESFMBible: Loading {} from {} from {}...").format(
                        BBB, self.name, self.sourceFolder))
            except UnicodeEncodeError:
                print(_("  ESFMBible: Loading {}...").format(BBB))
        if filename is None: filename = self.possibleFilenameDict[BBB]
        EBB = ESFMBibleBook(self, BBB)
        EBB.load(filename, self.sourceFolder)
        if EBB._rawLines:
            EBB.validateMarkers(
            )  # Usually activates InternalBibleBook.processLines()
            self.saveBook(EBB)
        else:
            logging.info("ESFM book {} was completely blank".format(BBB))

    # end of ESFMBible.loadBook

    def _loadBookMP(self, BBB_Filename):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing)

        Parameter is a 2-tuple containing BBB and the filename.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("ESFMBible.loadBookMP( {} )".format(BBB_Filename))
        BBB, filename = BBB_Filename
        assert (BBB not in self.books)
        if BBB in self.dontLoadBook: return None
        self.triedLoadingBook[BBB] = True
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(
                _("  ESFMBible: Loading {} from {} from {}...").format(
                    BBB, self.name, self.sourceFolder))
        EBB = ESFMBibleBook(self, BBB)
        EBB.load(self.possibleFilenameDict[BBB], self.sourceFolder)
        EBB.validateMarkers(
        )  # Usually activates InternalBibleBook.processLines()
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(_("    Finishing loading ESFM book {}.").format(BBB))
        return EBB

    # end of ESFMBible.loadBookMP

    def load(self):
        """
        Load all the books.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print(
                _("ESFMBible: Loading {} from {}...").format(
                    self.name, self.sourceFolder))

        if self.maximumPossibleFilenameTuples:
            # First try to load the dictionaries
            self.loadDictionaries()
            # Now load the books
            if BibleOrgSysGlobals.maxProcesses > 1:  # Load all the books as quickly as possible
                #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print(
                        _("ESFMBible: Loading {} books using {} CPUs...").
                        format(len(self.maximumPossibleFilenameTuples),
                               BibleOrgSysGlobals.maxProcesses))
                    print(
                        "  NOTE: Outputs (including error and warning messages) from loading various books may be interspersed."
                    )
                with multiprocessing.Pool(
                        processes=BibleOrgSysGlobals.maxProcesses
                ) as pool:  # start worker processes
                    results = pool.map(self._loadBookMP,
                                       self.maximumPossibleFilenameTuples
                                       )  # have the pool do our loads
                    assert (len(results) == len(
                        self.maximumPossibleFilenameTuples))
                    for bBook in results:
                        if bBook is not None:
                            self.saveBook(
                                bBook)  # Saves them in the correct order
            else:  # Just single threaded
                # Load the books one by one -- assuming that they have regular Paratext style filenames
                for BBB, filename in self.maximumPossibleFilenameTuples:
                    #if BibleOrgSysGlobals.verbosityLevel > 1 or BibleOrgSysGlobals.debugFlag:
                    #print( _("  ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
                    if BBB not in self.dontLoadBook:
                        loadedBook = self.loadBook(BBB,
                                                   filename)  # also saves it
        else:
            logging.critical(
                _("ESFMBible: No books to load in {}!").format(
                    self.sourceFolder))
        #print( self.getBookList() )
        if 'Tag errors' in self.semanticDict:
            print("Tag errors:", self.semanticDict['Tag errors'])
        if 'Missing' in self.semanticDict:
            print("Missing:", self.semanticDict['Missing'])
        self.doPostLoadProcessing()
Ejemplo n.º 10
0
class USFMBible( Bible ):
    """
    Class to load and manipulate USFM Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None ):
        """
        Create the internal USFM Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFM Bible object"
        self.objectTypeString = "USFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( t("__init__: Not sure what '{}' is in {}!").format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( t("__init__: Surprised to see subfolders in '{}': {}").format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in '{}'").format( self.sourceFolder ) )
            raise FileNotFoundError # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule):
            print( "USFMFilenamesObject", self.USFMFilenamesObject )

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
        if len(ssfFilepathList) == 1: # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData( self.ssfFilepath )
            if self.encoding is None and 'Encoding' in self.ssfDict: # See if the SSF file gives some help to us
                ssfEncoding = self.ssfDict['Encoding']
                if ssfEncoding == '65001': self.encoding = 'utf-8'
                else:
                    if Globals.verbosityLevel > 0:
                        print( t("__init__: File encoding in SSF is set to '{}'").format( ssfEncoding ) )
                    if ssfEncoding.isdigit():
                        self.encoding = 'cp' + ssfEncoding
                        if Globals.verbosityLevel > 0:
                            print( t("__init__: Switched to '{}' file encoding").format( self.encoding ) )
                    else:
                        logging.critical( t("__init__: Unsure how to handle '{}' file encoding").format( ssfEncoding ) )


        self.name = self.givenName
        if self.name is None:
            for field in ('FullName','Name',):
                if field in self.settingsDict: self.name = self.settingsDict[field]; break
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename
    # end of USFMBible.__init_


    def loadSSFData( self, ssfFilepath, encoding=None ):
        """Process the SSF data from the given filepath.
            Returns a dictionary."""
        if Globals.verbosityLevel > 2: print( t("Loading SSF data from '{}' ({})").format( ssfFilepath, encoding ) )
        if encoding is None: encoding = 'utf-8'
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open( ssfFilepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
                    logging.info( t("loadSSFData: Detected UTF-16 Byte Order Marker in {}").format( ssfFilepath ) )
                    line = line[1:] # Remove the Byte Order Marker
                if line[-1]=='\n': line = line[:-1] # Remove trailing newline character
                line = line.strip() # Remove leading and trailing whitespace
                if not line: continue # Just discard blank lines
                lastLine = line
                processed = False
                if status==0 and line=="<ScriptureText>":
                    status = 1
                    processed = True
                elif status==1 and line=="</ScriptureText>":
                    status = 2
                    processed = True
                elif status==1 and line[0]=='<' and line.endswith('/>'): # Handle a self-closing (empty) field
                    fieldname = line[1:-3] if line.endswith(' />') else line[1:-2] # Handle it with or without a space
                    if ' ' not in fieldname:
                        settingsDict[fieldname] = ''
                        processed = True
                    elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes
                        bits = fieldname.split( None, 1 )
                        if Globals.debugFlag: assert( len(bits)==2 )
                        fieldname = bits[0]
                        attributes = bits[1]
                        #print( "attributes = '{}'".format( attributes) )
                        settingsDict[fieldname] = (contents, attributes)
                        processed = True
                elif status==1 and line[0]=='<' and line[-1]=='>':
                    ix1 = line.index('>')
                    ix2 = line.index('</')
                    if ix1!=-1 and ix2!=-1 and ix2>ix1:
                        fieldname = line[1:ix1]
                        contents = line[ix1+1:ix2]
                        if ' ' not in fieldname and line[ix2+2:-1]==fieldname:
                            settingsDict[fieldname] = contents
                            processed = True
                        elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes
                            bits = fieldname.split( None, 1 )
                            if Globals.debugFlag: assert( len(bits)==2 )
                            fieldname = bits[0]
                            attributes = bits[1]
                            #print( "attributes = '{}'".format( attributes) )
                            if line[ix2+2:-1]==fieldname:
                                settingsDict[fieldname] = (contents, attributes)
                                processed = True
                if not processed: print( t("ERROR: Unexpected '{}' line in SSF file").format( line ) )
        if Globals.verbosityLevel > 2:
            print( "  " + t("Got {} SSF entries:").format( len(settingsDict) ) )
            if Globals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    try: print( "    {}: {}".format( key, settingsDict[key] ) )
                    except UnicodeEncodeError: print( "    {}: UNICODE ENCODING ERROR".format( key ) )
        self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings
        self.settingsDict = settingsDict.copy() # This will be all the combined settings
    # end of USFMBible.loadSSFData


    def loadBook( self, BBB, filename=None ):
        """
        Load the requested book if it's not already loaded.
        """
        if Globals.verbosityLevel > 2: print( "USFMBible.loadBook( {}, {} )".format( BBB, filename ) )
        if BBB in self.books: return # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) )
            return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        if Globals.verbosityLevel > 2 or Globals.debugFlag: print( _("  USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
        if filename is None and BBB in self.possibleFilenameDict: filename = self.possibleFilenameDict[BBB]
        if filename is None: raise FileNotFoundError( "USFMBible.loadBook: Unable to find file for {}".format( BBB ) )
        UBB = USFMBibleBook( self, BBB )
        UBB.load( filename, self.sourceFolder, self.encoding )
        if UBB._rawLines:
            UBB.validateMarkers() # Usually activates InternalBibleBook.processLines()
            self.saveBook( UBB )
        else: logging.info( "USFM book {} was completely blank".format( BBB ) )
    # end of USFMBible.loadBook


    def _loadBookMP( self, BBB_Filename ):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing)

        Parameter is a 2-tuple containing BBB and the filename.
        """
        if Globals.verbosityLevel > 3: print( t("loadBookMP( {} )").format( BBB_Filename ) )
        BBB, filename = BBB_Filename
        assert( BBB not in self.books )
        self.triedLoadingBook[BBB] = True
        if Globals.verbosityLevel > 2 or Globals.debugFlag:
            print( '  ' + t("Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
        UBB = USFMBibleBook( self, BBB )
        UBB.load( self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding )
        UBB.validateMarkers() # Usually activates InternalBibleBook.processLines()
        if Globals.verbosityLevel > 2 or Globals.debugFlag: print( _("    Finishing loading USFM book {}.").format( BBB ) )
        return UBB
    # end of USFMBible.loadBookMP


    def load( self ):
        """
        Load all the books.
        """
        if Globals.verbosityLevel > 1: print( t("Loading {} from {}...").format( self.name, self.sourceFolder ) )

        if self.maximumPossibleFilenameTuples:
            if Globals.maxProcesses > 1: # Load all the books as quickly as possible
                #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map
                if Globals.verbosityLevel > 1:
                    print( t("Loading {} books using {} CPUs...").format( len(self.maximumPossibleFilenameTuples), Globals.maxProcesses ) )
                    print( "  NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." )
                with multiprocessing.Pool( processes=Globals.maxProcesses ) as pool: # start worker processes
                    results = pool.map( self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads
                    assert( len(results) == len(self.maximumPossibleFilenameTuples) )
                    for bBook in results: self.saveBook( bBook ) # Saves them in the correct order
            else: # Just single threaded
                # Load the books one by one -- assuming that they have regular Paratext style filenames
                for BBB,filename in self.maximumPossibleFilenameTuples:
                    #if Globals.verbosityLevel > 1 or Globals.debugFlag:
                        #print( _("  USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
                    loadedBook = self.loadBook( BBB, filename ) # also saves it
        else:
            logging.critical( t("No books to load in {}!").format( self.sourceFolder ) )
        #print( self.getBookList() )
        self.doPostLoadProcessing()
Ejemplo n.º 11
0
    def preload( self ):
        """
        Loads the SSF file if it can be found.
        Tries to determine USFM filename pattern.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print( exp("preload() from {}").format( self.sourceFolder ) )
            assert not self.preloadDone
            assert self.sourceFolder is not None

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            #print( repr(something) )
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( exp("preload: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX',): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( exp("preload: Surprised to see subfolders in {!r}: {}").format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( exp("preload: Couldn't find any files in {!r}").format( self.sourceFolder ) )
            raise FileNotFoundError # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print( "USFMFilenamesObject", self.USFMFilenamesObject )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        if self.ssfFilepath is None: # it might have been loaded first
            # Attempt to load the SSF file
            #self.suppliedMetadata, self.settingsDict = {}, {}
            ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
            #print( "ssfFilepathList", ssfFilepathList )
            if len(ssfFilepathList) > 1:
                logging.error( exp("preload: Found multiple possible SSF files -- using first one: {}").format( ssfFilepathList ) )
            if len(ssfFilepathList) >= 1: # Seems we found the right one
                from PTX7Bible import loadPTX7ProjectData
                PTXSettingsDict = loadPTX7ProjectData( self, ssfFilepathList[0] )
                if PTXSettingsDict:
                    if self.suppliedMetadata is None: self.suppliedMetadata = {}
                    if 'PTX' not in self.suppliedMetadata: self.suppliedMetadata['PTX'] = {}
                    self.suppliedMetadata['PTX']['SSF'] = PTXSettingsDict
                    self.applySuppliedMetadata( 'SSF' ) # Copy some to BibleObject.settingsDict

        #self.name = self.givenName
        #if self.name is None:
            #for field in ('FullName','Name',):
                #if field in self.settingsDict: self.name = self.settingsDict[field]; break
        #if not self.name: self.name = os.path.basename( self.sourceFolder )
        #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        #if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

        self.preloadDone = True
Ejemplo n.º 12
0
class USFMBible( Bible ):
    """
    Class to load and manipulate USFM Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None ):
        """
        Create the internal USFM Bible object.

        Note that sourceFolder can be None if we don't know that yet.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = 'USFM Bible object'
        self.objectTypeString = 'USFM'

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        self.ssfFilepath = None
    # end of USFMBible.__init_


    def preload( self ):
        """
        Loads the SSF file if it can be found.
        Tries to determine USFM filename pattern.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print( exp("preload() from {}").format( self.sourceFolder ) )
            assert not self.preloadDone
            assert self.sourceFolder is not None

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            #print( repr(something) )
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( exp("preload: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX',): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( exp("preload: Surprised to see subfolders in {!r}: {}").format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( exp("preload: Couldn't find any files in {!r}").format( self.sourceFolder ) )
            raise FileNotFoundError # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print( "USFMFilenamesObject", self.USFMFilenamesObject )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        if self.ssfFilepath is None: # it might have been loaded first
            # Attempt to load the SSF file
            #self.suppliedMetadata, self.settingsDict = {}, {}
            ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
            #print( "ssfFilepathList", ssfFilepathList )
            if len(ssfFilepathList) > 1:
                logging.error( exp("preload: Found multiple possible SSF files -- using first one: {}").format( ssfFilepathList ) )
            if len(ssfFilepathList) >= 1: # Seems we found the right one
                from PTX7Bible import loadPTX7ProjectData
                PTXSettingsDict = loadPTX7ProjectData( self, ssfFilepathList[0] )
                if PTXSettingsDict:
                    if self.suppliedMetadata is None: self.suppliedMetadata = {}
                    if 'PTX' not in self.suppliedMetadata: self.suppliedMetadata['PTX'] = {}
                    self.suppliedMetadata['PTX']['SSF'] = PTXSettingsDict
                    self.applySuppliedMetadata( 'SSF' ) # Copy some to BibleObject.settingsDict

        #self.name = self.givenName
        #if self.name is None:
            #for field in ('FullName','Name',):
                #if field in self.settingsDict: self.name = self.settingsDict[field]; break
        #if not self.name: self.name = os.path.basename( self.sourceFolder )
        #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        #if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

        self.preloadDone = True
    # end of USFMBible.preload


    def loadBook( self, BBB, filename=None ):
        """
        Load the requested book into self.books if it's not already loaded.

        NOTE: You should ensure that preload() has been called first.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print( "USFMBible.loadBook( {}, {} )".format( BBB, filename ) )
            assert self.preloadDone

        if BBB not in self.bookNeedsReloading or not self.bookNeedsReloading[BBB]:
            if BBB in self.books:
                if BibleOrgSysGlobals.debugFlag: print( "  {} is already loaded -- returning".format( BBB ) )
                return # Already loaded
            if BBB in self.triedLoadingBook:
                logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) )
                return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True

        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("  USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) )
        if filename is None and BBB in self.possibleFilenameDict: filename = self.possibleFilenameDict[BBB]
        if filename is None: raise FileNotFoundError( "USFMBible.loadBook: Unable to find file for {}".format( BBB ) )
        UBB = USFMBibleBook( self, BBB )
        UBB.load( filename, self.sourceFolder, self.encoding )
        if UBB._rawLines:
            UBB.validateMarkers() # Usually activates InternalBibleBook.processLines()
            self.stashBook( UBB )
        else: logging.info( "USFM book {} was completely blank".format( BBB ) )
        self.bookNeedsReloading[BBB] = False
    # end of USFMBible.loadBook


    def _loadBookMP( self, BBB_Filename_tuple ):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing)

        Parameter is a 2-tuple containing BBB and the filename.

        Returns the book info.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print( exp("loadBookMP( {} )").format( BBB_Filename_tuple ) )

        BBB, filename = BBB_Filename_tuple
        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag: print( "  {} is already loaded -- returning".format( BBB ) )
            return self.books[BBB] # Already loaded
        #if BBB in self.triedLoadingBook:
            #logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) )
            #return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print( '  ' + exp("Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) )
        UBB = USFMBibleBook( self, BBB )
        UBB.load( self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding )
        UBB.validateMarkers() # Usually activates InternalBibleBook.processLines()
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("    Finishing loading USFM book {}.").format( BBB ) )
        return UBB
    # end of USFMBible.loadBookMP


    def loadBooks( self ):
        """
        Load all the Bible books.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( exp("Loading {} from {}…").format( self.name if self.name else self.abbreviation, self.sourceFolder ) )

        if not self.preloadDone: self.preload()

        if self.maximumPossibleFilenameTuples:
            if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible
                #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print( _("Loading {} {} books using {} CPUs…").format( len(self.maximumPossibleFilenameTuples), 'USFM', BibleOrgSysGlobals.maxProcesses ) )
                    print( _("  NOTE: Outputs (including error and warning messages) from loading various books may be interspersed.") )
                with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes
                    results = pool.map( self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads
                    assert len(results) == len(self.maximumPossibleFilenameTuples)
                    for bBook in results: self.stashBook( bBook ) # Saves them in the correct order
            else: # Just single threaded
                # Load the books one by one -- assuming that they have regular Paratext style filenames
                for BBB,filename in self.maximumPossibleFilenameTuples:
                    #if BibleOrgSysGlobals.verbosityLevel>1 or BibleOrgSysGlobals.debugFlag:
                        #print( _("  USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) )
                    #loadedBook = self.loadBook( BBB, filename ) # also saves it
                    self.loadBook( BBB, filename ) # also saves it
        else:
            logging.critical( exp("No books to load in {}!").format( self.sourceFolder ) )
        #print( self.getBookList() )
        self.doPostLoadProcessing()
    # end of USFMBible.loadBooks

    def load( self ):
        self.loadBooks()
Ejemplo n.º 13
0
    def preload(self,
                sourceFolder,
                givenName=None,
                givenAbbreviation=None,
                encoding=None):
        """
        Loads the SSF file if it can be found.
        Tries to determine USFM filename pattern.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print(
                t("preload( {} {} {} {} )").format(sourceFolder, givenName,
                                                   givenAbbreviation,
                                                   encoding))
        if BibleOrgSysGlobals.debugFlag: assert (sourceFolder)
        self.sourceFolder = sourceFolder
        if givenName: self.givenName = givenName
        if givenAbbreviation: self.givenAbbreviation = givenAbbreviation
        if encoding: self.encoding = encoding

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error(
                    t("__init__: Not sure what {!r} is in {}!").format(
                        somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    t("__init__: Surprised to see subfolders in {!r}: {}").
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0:
                print(
                    t("__init__: Couldn't find any files in {!r}").format(
                        self.sourceFolder))
            raise FileNotFoundError  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if BibleOrgSysGlobals.verbosityLevel > 3 or (
                BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print("USFMFilenamesObject", self.USFMFilenamesObject)

        if self.ssfFilepath is None:  # it might have been loaded first
            # Attempt to load the SSF file
            self.ssfDict, self.settingsDict = {}, {}
            ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
                searchAbove=True, auto=True)
            if len(ssfFilepathList) == 1:  # Seems we found the right one
                self.loadSSFData(ssfFilepathList[0])

        self.name = self.givenName
        if self.name is None:
            for field in (
                    'FullName',
                    'Name',
            ):
                if field in self.settingsDict:
                    self.name = self.settingsDict[field]
                    break
        if not self.name: self.name = os.path.basename(self.sourceFolder)
        if not self.name:
            self.name = os.path.basename(
                self.sourceFolder[:-1])  # Remove the final slash
        if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename
Ejemplo n.º 14
0
class USFMBible(Bible):
    """
    Class to load and manipulate USFM Bibles.

    """
    def __init__(self,
                 sourceFolder,
                 givenName=None,
                 givenAbbreviation=None,
                 encoding=None):
        """
        Create the internal USFM Bible object.

        Note that sourceFolder can be None if we don't know that yet.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "USFM Bible object"
        self.objectTypeString = "USFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        self.ssfFilepath, self.ssfDict, self.settingsDict = None, {}, {}
        if sourceFolder is not None:
            self.preload(sourceFolder)

    # end of USFMBible.__init_

    def preload(self,
                sourceFolder,
                givenName=None,
                givenAbbreviation=None,
                encoding=None):
        """
        Loads the SSF file if it can be found.
        Tries to determine USFM filename pattern.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print(
                t("preload( {} {} {} {} )").format(sourceFolder, givenName,
                                                   givenAbbreviation,
                                                   encoding))
        if BibleOrgSysGlobals.debugFlag: assert (sourceFolder)
        self.sourceFolder = sourceFolder
        if givenName: self.givenName = givenName
        if givenAbbreviation: self.givenAbbreviation = givenAbbreviation
        if encoding: self.encoding = encoding

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error(
                    t("__init__: Not sure what {!r} is in {}!").format(
                        somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    t("__init__: Surprised to see subfolders in {!r}: {}").
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0:
                print(
                    t("__init__: Couldn't find any files in {!r}").format(
                        self.sourceFolder))
            raise FileNotFoundError  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if BibleOrgSysGlobals.verbosityLevel > 3 or (
                BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print("USFMFilenamesObject", self.USFMFilenamesObject)

        if self.ssfFilepath is None:  # it might have been loaded first
            # Attempt to load the SSF file
            self.ssfDict, self.settingsDict = {}, {}
            ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
                searchAbove=True, auto=True)
            if len(ssfFilepathList) == 1:  # Seems we found the right one
                self.loadSSFData(ssfFilepathList[0])

        self.name = self.givenName
        if self.name is None:
            for field in (
                    'FullName',
                    'Name',
            ):
                if field in self.settingsDict:
                    self.name = self.settingsDict[field]
                    break
        if not self.name: self.name = os.path.basename(self.sourceFolder)
        if not self.name:
            self.name = os.path.basename(
                self.sourceFolder[:-1])  # Remove the final slash
        if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

    # end of USFMBible.preload

    def loadSSFData(self, ssfFilepath, encoding=None):
        """
        Process the SSF data from the given filepath.

        Returns a dictionary.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print(
                t("Loading SSF data from {!r} ({})").format(
                    ssfFilepath, encoding))
        if encoding is None: encoding = 'utf-8'
        self.ssfFilepath = ssfFilepath
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open(ssfFilepath, encoding=encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and line and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        t("loadSSFData: Detected UTF-16 Byte Order Marker in {}"
                          ).format(ssfFilepath))
                    line = line[1:]  # Remove the Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Remove trailing newline character
                line = line.strip()  # Remove leading and trailing whitespace
                if not line: continue  # Just discard blank lines
                lastLine = line
                processed = False
                if status == 0 and line == "<ScriptureText>":
                    status = 1
                    processed = True
                elif status == 1 and line == "</ScriptureText>":
                    status = 9
                    processed = True
                elif status == 1 and line[0] == '<' and line.endswith(
                        '/>'):  # Handle a self-closing (empty) field
                    fieldname = line[1:-3] if line.endswith(' />') else line[
                        1:-2]  # Handle it with or without a space
                    if ' ' not in fieldname:
                        settingsDict[fieldname] = ''
                        processed = True
                    elif ' ' in fieldname:  # Some fields (like "Naming") may contain attributes
                        bits = fieldname.split(None, 1)
                        if BibleOrgSysGlobals.debugFlag:
                            assert (len(bits) == 2)
                        fieldname = bits[0]
                        attributes = bits[1]
                        #print( "attributes = {!r}".format( attributes) )
                        settingsDict[fieldname] = (contents, attributes)
                        processed = True
                elif status == 1 and line[0] == '<' and line[-1] == '>':
                    ix1 = line.find('>')
                    ix2 = line.find('</')
                    if ix1 != -1 and ix2 != -1 and ix2 > ix1:
                        fieldname = line[1:ix1]
                        contents = line[ix1 + 1:ix2]
                        if ' ' not in fieldname and line[ix2 +
                                                         2:-1] == fieldname:
                            settingsDict[fieldname] = contents
                            processed = True
                        elif ' ' in fieldname:  # Some fields (like "Naming") may contain attributes
                            bits = fieldname.split(None, 1)
                            if BibleOrgSysGlobals.debugFlag:
                                assert (len(bits) == 2)
                            fieldname = bits[0]
                            attributes = bits[1]
                            #print( "attributes = {!r}".format( attributes) )
                            if line[ix2 + 2:-1] == fieldname:
                                settingsDict[fieldname] = (contents,
                                                           attributes)
                                processed = True
                elif status == 1 and line[0] == '<ValidCharacters>' and line[
                        -1] == '>':
                    fieldname = 'ValidCharacters'
                if not processed:
                    print(
                        _("ERROR: Unexpected {} line in SSF file").format(
                            repr(line)))
        if status == 0:
            logging.error("SSF file was empty: {}".format(self.ssfFilepath))
            status = 9
        if BibleOrgSysGlobals.debugFlag: assert (status == 9)
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " + t("Got {} SSF entries:").format(len(settingsDict)))
            if BibleOrgSysGlobals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    try:
                        print("    {}: {}".format(key, settingsDict[key]))
                    except UnicodeEncodeError:
                        print("    {}: UNICODE ENCODING ERROR".format(key))
        self.ssfDict = settingsDict  # We'll keep a copy of just the SSF settings
        self.settingsDict = settingsDict.copy(
        )  # This will be all the combined settings

        # Determine our encoding while we're at it
        if self.encoding is None and 'Encoding' in self.ssfDict:  # See if the SSF file gives some help to us
            ssfEncoding = self.ssfDict['Encoding']
            if ssfEncoding == '65001': self.encoding = 'utf-8'
            else:
                if BibleOrgSysGlobals.verbosityLevel > 0:
                    print(
                        t("__init__: File encoding in SSF is set to {!r}").
                        format(ssfEncoding))
                if ssfEncoding.isdigit():
                    self.encoding = 'cp' + ssfEncoding
                    if BibleOrgSysGlobals.verbosityLevel > 0:
                        print(
                            t("__init__: Switched to {!r} file encoding").
                            format(self.encoding))
                else:
                    logging.critical(
                        t("__init__: Unsure how to handle {!r} file encoding").
                        format(ssfEncoding))

    # end of USFMBible.loadSSFData

    def loadBook(self, BBB, filename=None):
        """
        Load the requested book into self.books if it's not already loaded.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("USFMBible.loadBook( {}, {} )".format(BBB, filename))
        if BBB in self.books: return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading USFM {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(
                _("  USFMBible: Loading {} from {} from {}...").format(
                    BBB, self.name, self.sourceFolder))
        if filename is None and BBB in self.possibleFilenameDict:
            filename = self.possibleFilenameDict[BBB]
        if filename is None:
            raise FileNotFoundError(
                "USFMBible.loadBook: Unable to find file for {}".format(BBB))
        UBB = USFMBibleBook(self, BBB)
        UBB.load(filename, self.sourceFolder, self.encoding)
        if UBB._rawLines:
            UBB.validateMarkers(
            )  # Usually activates InternalBibleBook.processLines()
            self.saveBook(UBB)
        else:
            logging.info("USFM book {} was completely blank".format(BBB))

    # end of USFMBible.loadBook

    def _loadBookMP(self, BBB_Filename):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing)

        Parameter is a 2-tuple containing BBB and the filename.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(t("loadBookMP( {} )").format(BBB_Filename))
        BBB, filename = BBB_Filename
        assert (BBB not in self.books)
        self.triedLoadingBook[BBB] = True
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print('  ' + t("Loading {} from {} from {}...").format(
                BBB, self.name, self.sourceFolder))
        UBB = USFMBibleBook(self, BBB)
        UBB.load(self.possibleFilenameDict[BBB], self.sourceFolder,
                 self.encoding)
        UBB.validateMarkers(
        )  # Usually activates InternalBibleBook.processLines()
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(_("    Finishing loading USFM book {}.").format(BBB))
        return UBB

    # end of USFMBible.loadBookMP

    def load(self):
        """
        Load all the books.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print(
                t("Loading {} from {}...").format(self.name,
                                                  self.sourceFolder))

        if self.maximumPossibleFilenameTuples:
            if BibleOrgSysGlobals.maxProcesses > 1:  # Load all the books as quickly as possible
                #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print(
                        t("Loading {} books using {} CPUs...").format(
                            len(self.maximumPossibleFilenameTuples),
                            BibleOrgSysGlobals.maxProcesses))
                    print(
                        "  NOTE: Outputs (including error and warning messages) from loading various books may be interspersed."
                    )
                with multiprocessing.Pool(
                        processes=BibleOrgSysGlobals.maxProcesses
                ) as pool:  # start worker processes
                    results = pool.map(self._loadBookMP,
                                       self.maximumPossibleFilenameTuples
                                       )  # have the pool do our loads
                    assert (len(results) == len(
                        self.maximumPossibleFilenameTuples))
                    for bBook in results:
                        self.saveBook(bBook)  # Saves them in the correct order
            else:  # Just single threaded
                # Load the books one by one -- assuming that they have regular Paratext style filenames
                for BBB, filename in self.maximumPossibleFilenameTuples:
                    #if BibleOrgSysGlobals.verbosityLevel > 1 or BibleOrgSysGlobals.debugFlag:
                    #print( _("  USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) )
                    loadedBook = self.loadBook(BBB, filename)  # also saves it
        else:
            logging.critical(
                t("No books to load in {}!").format(self.sourceFolder))
        #print( self.getBookList() )
        self.doPostLoadProcessing()
Ejemplo n.º 15
0
class USFMBible(Bible):
    """
    Class to load and manipulate USFM Bibles.

    """
    def __init__(self,
                 sourceFolder,
                 givenName=None,
                 givenAbbreviation=None,
                 encoding=None):
        """
        Create the internal USFM Bible object.

        Note that sourceFolder can be None if we don't know that yet.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = 'USFM Bible object'
        self.objectTypeString = 'USFM'

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        self.ssfFilepath = None

    # end of USFMBible.__init_

    def preload(self):
        """
        Loads the SSF file if it can be found.
        Tries to determine USFM filename pattern.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print(exp("preload() from {}").format(self.sourceFolder))
            assert not self.preloadDone
            assert self.sourceFolder is not None

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            #print( repr(something) )
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error(
                    exp("preload: Not sure what {!r} is in {}!").format(
                        somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX', ): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    exp("preload: Surprised to see subfolders in {!r}: {}").
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0:
                print(
                    exp("preload: Couldn't find any files in {!r}").format(
                        self.sourceFolder))
            raise FileNotFoundError  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if BibleOrgSysGlobals.verbosityLevel > 3 or (
                BibleOrgSysGlobals.debugFlag and debuggingThisModule):
            print("USFMFilenamesObject", self.USFMFilenamesObject)

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        if self.ssfFilepath is None:  # it might have been loaded first
            # Attempt to load the SSF file
            #self.suppliedMetadata, self.settingsDict = {}, {}
            ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
                searchAbove=True, auto=True)
            #print( "ssfFilepathList", ssfFilepathList )
            if len(ssfFilepathList) > 1:
                logging.error(
                    exp("preload: Found multiple possible SSF files -- using first one: {}"
                        ).format(ssfFilepathList))
            if len(ssfFilepathList) >= 1:  # Seems we found the right one
                from PTX7Bible import loadPTX7ProjectData
                PTXSettingsDict = loadPTX7ProjectData(self, ssfFilepathList[0])
                if PTXSettingsDict:
                    if self.suppliedMetadata is None:
                        self.suppliedMetadata = {}
                    if 'PTX7' not in self.suppliedMetadata:
                        self.suppliedMetadata['PTX7'] = {}
                    self.suppliedMetadata['PTX7']['SSF'] = PTXSettingsDict
                    self.applySuppliedMetadata(
                        'SSF')  # Copy some to BibleObject.settingsDict

        #self.name = self.givenName
        #if self.name is None:
        #for field in ('FullName','Name',):
        #if field in self.settingsDict: self.name = self.settingsDict[field]; break
        #if not self.name: self.name = os.path.basename( self.sourceFolder )
        #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        #if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.availableBBBs.add(BBB)
            self.possibleFilenameDict[BBB] = filename

        self.preloadDone = True

    # end of USFMBible.preload

    def loadBook(self, BBB, filename=None):
        """
        Load the requested book into self.books if it's not already loaded.

        NOTE: You should ensure that preload() has been called first.
        """
        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            print("USFMBible.loadBook( {}, {} )".format(BBB, filename))
            assert self.preloadDone

        if BBB not in self.bookNeedsReloading or not self.bookNeedsReloading[
                BBB]:
            if BBB in self.books:
                if BibleOrgSysGlobals.debugFlag:
                    print("  {} is already loaded -- returning".format(BBB))
                return  # Already loaded
            if BBB in self.triedLoadingBook:
                logging.warning(
                    "We had already tried loading USFM {} for {}".format(
                        BBB, self.name))
                return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True

        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(
                _("  USFMBible: Loading {} from {} from {}…").format(
                    BBB, self.name, self.sourceFolder))
        if filename is None and BBB in self.possibleFilenameDict:
            filename = self.possibleFilenameDict[BBB]
        if filename is None:
            raise FileNotFoundError(
                "USFMBible.loadBook: Unable to find file for {}".format(BBB))
        UBB = USFMBibleBook(self, BBB)
        UBB.load(filename, self.sourceFolder, self.encoding)
        if UBB._rawLines:
            UBB.validateMarkers(
            )  # Usually activates InternalBibleBook.processLines()
            self.stashBook(UBB)
        else:
            logging.info("USFM book {} was completely blank".format(BBB))
        self.bookNeedsReloading[BBB] = False

    # end of USFMBible.loadBook

    def _loadBookMP(self, BBB_Filename_duple):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing)

        Parameter is a 2-tuple containing BBB and the filename.

        Returns the book info.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(exp("loadBookMP( {} )").format(BBB_Filename_duple))

        BBB, filename = BBB_Filename_duple
        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag:
                print("  {} is already loaded -- returning".format(BBB))
            return self.books[BBB]  # Already loaded
        #if BBB in self.triedLoadingBook:
        #logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) )
        #return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print('  ' + exp("Loading {} from {} from {}…").format(
                BBB, self.name, self.sourceFolder))
        UBB = USFMBibleBook(self, BBB)
        UBB.load(self.possibleFilenameDict[BBB], self.sourceFolder,
                 self.encoding)
        UBB.validateMarkers(
        )  # Usually activates InternalBibleBook.processLines()
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(_("    Finishing loading USFM book {}.").format(BBB))
        return UBB

    # end of USFMBible.loadBookMP

    def loadBooks(self):
        """
        Load all the Bible books.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print(
                exp("Loading {} from {}…").format(self.getAName(),
                                                  self.sourceFolder))

        if not self.preloadDone: self.preload()

        if self.maximumPossibleFilenameTuples:
            if BibleOrgSysGlobals.maxProcesses > 1 \
            and not BibleOrgSysGlobals.alreadyMultiprocessing: # Get our subprocesses ready and waiting for work
                # Load all the books as quickly as possible
                #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print(
                        _("Loading {} {} books using {} CPUs…").format(
                            len(self.maximumPossibleFilenameTuples), 'USFM',
                            BibleOrgSysGlobals.maxProcesses))
                    print(
                        _("  NOTE: Outputs (including error and warning messages) from loading various books may be interspersed."
                          ))
                BibleOrgSysGlobals.alreadyMultiprocessing = True
                with multiprocessing.Pool(
                        processes=BibleOrgSysGlobals.maxProcesses
                ) as pool:  # start worker processes
                    results = pool.map(self._loadBookMP,
                                       self.maximumPossibleFilenameTuples
                                       )  # have the pool do our loads
                    assert len(results) == len(
                        self.maximumPossibleFilenameTuples)
                    for bBook in results:
                        self.stashBook(
                            bBook)  # Saves them in the correct order
                BibleOrgSysGlobals.alreadyMultiprocessing = False
            else:  # Just single threaded
                # Load the books one by one -- assuming that they have regular Paratext style filenames
                for BBB, filename in self.maximumPossibleFilenameTuples:
                    #if BibleOrgSysGlobals.verbosityLevel>1 or BibleOrgSysGlobals.debugFlag:
                    #print( _("  USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) )
                    #loadedBook = self.loadBook( BBB, filename ) # also saves it
                    self.loadBook(BBB, filename)  # also saves it
        else:
            logging.critical(
                exp("No books to load in {}!").format(self.sourceFolder))
        #print( self.getBookList() )
        self.doPostLoadProcessing()

    # end of USFMBible.loadBooks

    def load(self):
        self.loadBooks()
Ejemplo n.º 16
0
def ESFMBibleFileCheck( givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False ):
    """
    Given a folder, search for ESFM Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one ESFM Bible is found,
        returns the loaded ESFMBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck( {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad ) )
    if BibleOrgSysGlobals.debugFlag: assert( givenFolderName and isinstance( givenFolderName, str ) )
    if BibleOrgSysGlobals.debugFlag: assert( autoLoad in (True,False,) and autoLoadBooks in (True,False) )

    # Check that the given folder is readable
    if not os.access( givenFolderName, os.R_OK ):
        logging.critical( _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName ) )
        return False
    if not os.path.isdir( givenFolderName ):
        logging.critical( _("ESFMBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName ) )
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in given {}".format( givenFolderName ) )
    foundFolders, foundFiles = [], []
    for something in os.listdir( givenFolderName ):
        somepath = os.path.join( givenFolderName, something )
        if os.path.isdir( somepath ): foundFolders.append( something )
        elif os.path.isfile( somepath ):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
            #ignore = False
            #for ending in filenameEndingsToIgnore:
                #if somethingUpper.endswith( ending): ignore=True; break
            #if ignore: continue
            #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                #foundFiles.append( something )
            if somethingUpperExt in filenameEndingsToAccept:
                foundFiles.append( something )
    if '__MACOSX' in foundFolders:
        foundFolders.remove( '__MACOSX' )  # don't visit these directories

    # See if there's an ESFMBible project here in this given folder
    numFound = 0
    UFns = USFMFilenames( givenFolderName ) # Assuming they have standard Paratext style filenames
    if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns )
    filenameTuples = UFns.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
    for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions
        acceptFlag = False
        for fna in filenameEndingsToAccept:
            if fn.endswith( fna ): acceptFlag = True
        if not acceptFlag: filenameTuples.remove( (BBB,fn) )
    if BibleOrgSysGlobals.verbosityLevel > 3: print( "  Confirmed:", len(filenameTuples), filenameTuples )
    if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( "  Found {} ESFM file{}.".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) )
    if filenameTuples:
        SSFs = UFns.getSSFFilenames()
        if SSFs:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got SSFs:", SSFs )
            ssfFilepath = os.path.join( givenFolderName, SSFs[0] )
        numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck got", numFound, givenFolderName )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            eB = ESFMBible( givenFolderName )
            if autoLoadBooks: eB.load() # Load and process the file
            return eB
        return numFound

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted( foundFolders ):
        tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' )
        if not os.access( tryFolderName, os.R_OK ): # The subfolder is not readable
            logging.warning( _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName ) )
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3: print( "    ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName ) )
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir( tryFolderName ):
            somepath = os.path.join( givenFolderName, thisFolderName, something )
            if os.path.isdir( somepath ): foundSubfolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                #ignore = False
                #for ending in filenameEndingsToIgnore:
                    #if somethingUpper.endswith( ending): ignore=True; break
                #if ignore: continue
                #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    #foundSubfiles.append( something )
                if somethingUpperExt in filenameEndingsToAccept:
                    foundSubfiles.append( something )

        # See if there's an ESFM Bible here in this folder
        UFns = USFMFilenames( tryFolderName ) # Assuming they have standard Paratext style filenames
        if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns )
        filenameTuples = UFns.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions
            acceptFlag = False
            for fna in filenameEndingsToAccept:
                if fn.endswith( fna ): acceptFlag = True
            if not acceptFlag: filenameTuples.remove( (BBB,fn) )
        if BibleOrgSysGlobals.verbosityLevel > 3: print( "  Confirmed:", len(filenameTuples), filenameTuples )
        if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples: print( "  Found {} ESFM files: {}".format( len(filenameTuples), filenameTuples ) )
        elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( "  Found {} ESFM file{}".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) )
        if filenameTuples:
            SSFs = UFns.getSSFFilenames( searchAbove=True )
            if SSFs:
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got SSFs:", SSFs )
                ssfFilepath = os.path.join( thisFolderName, SSFs[0] )
            foundProjects.append( tryFolderName )
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck foundProjects", numFound, foundProjects )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = ESFMBible( foundProjects[0] )
            if autoLoadBooks: uB.load() # Load and process the file
            return uB
        return numFound
Ejemplo n.º 17
0
    def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None ):
        """
        Create the internal USFM Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFM Bible object"
        self.objectTypeString = "USFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ): foundFiles.append( something )
            else: logging.error( t("__init__: Not sure what '{}' is in {}!").format( somepath, self.sourceFolder ) )
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith( 'Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append( folderName )
            if unexpectedFolders:
                logging.info( t("__init__: Surprised to see subfolders in '{}': {}").format( self.sourceFolder, unexpectedFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in '{}'").format( self.sourceFolder ) )
            raise FileNotFoundError # No use continuing

        self.USFMFilenamesObject = USFMFilenames( self.sourceFolder )
        if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule):
            print( "USFMFilenamesObject", self.USFMFilenamesObject )

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True )
        if len(ssfFilepathList) == 1: # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData( self.ssfFilepath )
            if self.encoding is None and 'Encoding' in self.ssfDict: # See if the SSF file gives some help to us
                ssfEncoding = self.ssfDict['Encoding']
                if ssfEncoding == '65001': self.encoding = 'utf-8'
                else:
                    if Globals.verbosityLevel > 0:
                        print( t("__init__: File encoding in SSF is set to '{}'").format( ssfEncoding ) )
                    if ssfEncoding.isdigit():
                        self.encoding = 'cp' + ssfEncoding
                        if Globals.verbosityLevel > 0:
                            print( t("__init__: Switched to '{}' file encoding").format( self.encoding ) )
                    else:
                        logging.critical( t("__init__: Unsure how to handle '{}' file encoding").format( ssfEncoding ) )


        self.name = self.givenName
        if self.name is None:
            for field in ('FullName','Name',):
                if field in self.settingsDict: self.name = self.settingsDict[field]; break
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename
Ejemplo n.º 18
0
def ESFMBibleFileCheck(givenFolderName,
                       strictCheck=True,
                       autoLoad=False,
                       autoLoadBooks=False):
    """
    Given a folder, search for ESFM Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one ESFM Bible is found,
        returns the loaded ESFMBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("ESFMBibleFileCheck( {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad))
    if BibleOrgSysGlobals.debugFlag:
        assert (givenFolderName and isinstance(givenFolderName, str))
    if BibleOrgSysGlobals.debugFlag:
        assert (autoLoad in (
            True,
            False,
        ) and autoLoadBooks in (True, False))

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format(
                givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("ESFMBibleFileCheck: Given {!r} path is not a folder").format(
                givenFolderName))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(" ESFMBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath): foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            #ignore = False
            #for ending in filenameEndingsToIgnore:
            #if somethingUpper.endswith( ending): ignore=True; break
            #if ignore: continue
            #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
            #foundFiles.append( something )
            if somethingUpperExt in filenameEndingsToAccept:
                foundFiles.append(something)
    if '__MACOSX' in foundFolders:
        foundFolders.remove('__MACOSX')  # don't visit these directories

    # See if there's an ESFMBible project here in this given folder
    numFound = 0
    UFns = USFMFilenames(
        givenFolderName
    )  # Assuming they have standard Paratext style filenames
    if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns)
    filenameTuples = UFns.getMaximumPossibleFilenameTuples(
    )  # Returns (BBB,filename) 2-tuples
    for BBB, fn in filenameTuples[:]:  # Only accept our specific file extensions
        acceptFlag = False
        for fna in filenameEndingsToAccept:
            if fn.endswith(fna): acceptFlag = True
        if not acceptFlag: filenameTuples.remove((BBB, fn))
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print("  Confirmed:", len(filenameTuples), filenameTuples)
    if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples:
        print("  Found {} ESFM file{}.".format(
            len(filenameTuples), '' if len(filenameTuples) == 1 else 's'))
    if filenameTuples:
        SSFs = UFns.getSSFFilenames()
        if SSFs:
            if BibleOrgSysGlobals.verbosityLevel > 2: print("Got SSFs:", SSFs)
            ssfFilepath = os.path.join(givenFolderName, SSFs[0])
        numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ESFMBibleFileCheck got", numFound, givenFolderName)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            eB = ESFMBible(givenFolderName)
            if autoLoadBooks: eB.load()  # Load and process the file
            return eB
        return numFound

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format(
                    tryFolderName))
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("    ESFMBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath): foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(
                    somethingUpper)
                #ignore = False
                #for ending in filenameEndingsToIgnore:
                #if somethingUpper.endswith( ending): ignore=True; break
                #if ignore: continue
                #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                #foundSubfiles.append( something )
                if somethingUpperExt in filenameEndingsToAccept:
                    foundSubfiles.append(something)

        # See if there's an ESFM Bible here in this folder
        UFns = USFMFilenames(
            tryFolderName
        )  # Assuming they have standard Paratext style filenames
        if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns)
        filenameTuples = UFns.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        for BBB, fn in filenameTuples[:]:  # Only accept our specific file extensions
            acceptFlag = False
            for fna in filenameEndingsToAccept:
                if fn.endswith(fna): acceptFlag = True
            if not acceptFlag: filenameTuples.remove((BBB, fn))
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("  Confirmed:", len(filenameTuples), filenameTuples)
        if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples:
            print("  Found {} ESFM files: {}".format(len(filenameTuples),
                                                     filenameTuples))
        elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples:
            print("  Found {} ESFM file{}".format(
                len(filenameTuples), '' if len(filenameTuples) == 1 else 's'))
        if filenameTuples:
            SSFs = UFns.getSSFFilenames(searchAbove=True)
            if SSFs:
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print("Got SSFs:", SSFs)
                ssfFilepath = os.path.join(thisFolderName, SSFs[0])
            foundProjects.append(tryFolderName)
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ESFMBibleFileCheck foundProjects", numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = ESFMBible(foundProjects[0])
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 19
0
class USFMBible(Bible):
    """
    Class to load and manipulate USFM Bibles.

    """
    def __init__(self,
                 sourceFolder,
                 givenName=None,
                 givenAbbreviation=None,
                 encoding='utf-8'):
        """
        Create the internal USFM Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "USFM Bible object"
        self.objectTypeString = "USFM"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.sourceFolder):
            somepath = os.path.join(self.sourceFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error("Not sure what '{}' is in {}!".format(
                    somepath, self.sourceFolder))
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName.startswith('Interlinear_'): continue
                if folderName in ('__MACOSX'): continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    "USFMBible.load: Surprised to see subfolders in '{}': {}".
                    format(self.sourceFolder, unexpectedFolders))
        if not foundFiles:
            if Globals.verbosityLevel > 0:
                print("USFMBible: Couldn't find any files in '{}'".format(
                    self.sourceFolder))
            return  # No use continuing

        self.USFMFilenamesObject = USFMFilenames(self.sourceFolder)
        if Globals.verbosityLevel > 3 or (Globals.debugFlag
                                          and debuggingThisModule):
            print(self.USFMFilenamesObject)

        # Attempt to load the SSF file
        self.ssfFilepath, self.settingsDict = {}, {}
        ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames(
            searchAbove=True, auto=True)
        if len(ssfFilepathList) == 1:  # Seems we found the right one
            self.ssfFilepath = ssfFilepathList[0]
            self.loadSSFData(self.ssfFilepath)

        self.name = self.givenName
        if self.name is None:
            for field in (
                    'FullName',
                    'Name',
            ):
                if field in self.settingsDict:
                    self.name = self.settingsDict[field]
                    break
        if not self.name: self.name = os.path.basename(self.sourceFolder)
        if not self.name:
            self.name = os.path.basename(
                self.sourceFolder[:-1])  # Remove the final slash
        if not self.name: self.name = "USFM Bible"

        # Find the filenames of all our books
        self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples(
        )  # Returns (BBB,filename) 2-tuples
        self.possibleFilenameDict = {}
        for BBB, filename in self.maximumPossibleFilenameTuples:
            self.possibleFilenameDict[BBB] = filename

    # end of USFMBible.__init_

    def loadSSFData(self, ssfFilepath, encoding='utf-8'):
        """Process the SSF data from the given filepath.
            Returns a dictionary."""
        if Globals.verbosityLevel > 2:
            print(_("Loading SSF data from '{}'").format(ssfFilepath))
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open(ssfFilepath, encoding=encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and line and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        "USFMBible.loadSSFData: Detected UTF-16 Byte Order Marker in {}"
                        .format(ssfFilepath))
                    line = line[1:]  # Remove the Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Remove trailing newline character
                line = line.strip()  # Remove leading and trailing whitespace
                if not line: continue  # Just discard blank lines
                lastLine = line
                processed = False
                if status == 0 and line == "<ScriptureText>":
                    status = 1
                    processed = True
                elif status == 1 and line == "</ScriptureText>":
                    status = 2
                    processed = True
                elif status == 1 and line[0] == '<' and line.endswith(
                        '/>'):  # Handle a self-closing (empty) field
                    fieldname = line[1:-3] if line.endswith(' />') else line[
                        1:-2]  # Handle it with or without a space
                    if ' ' not in fieldname:
                        settingsDict[fieldname] = ''
                        processed = True
                    elif ' ' in fieldname:  # Some fields (like "Naming") may contain attributes
                        bits = fieldname.split(None, 1)
                        if Globals.debugFlag: assert (len(bits) == 2)
                        fieldname = bits[0]
                        attributes = bits[1]
                        #print( "attributes = '{}'".format( attributes) )
                        settingsDict[fieldname] = (contents, attributes)
                        processed = True
                elif status == 1 and line[0] == '<' and line[-1] == '>':
                    ix1 = line.index('>')
                    ix2 = line.index('</')
                    if ix1 != -1 and ix2 != -1 and ix2 > ix1:
                        fieldname = line[1:ix1]
                        contents = line[ix1 + 1:ix2]
                        if ' ' not in fieldname and line[ix2 +
                                                         2:-1] == fieldname:
                            settingsDict[fieldname] = contents
                            processed = True
                        elif ' ' in fieldname:  # Some fields (like "Naming") may contain attributes
                            bits = fieldname.split(None, 1)
                            if Globals.debugFlag: assert (len(bits) == 2)
                            fieldname = bits[0]
                            attributes = bits[1]
                            #print( "attributes = '{}'".format( attributes) )
                            if line[ix2 + 2:-1] == fieldname:
                                settingsDict[fieldname] = (contents,
                                                           attributes)
                                processed = True
                if not processed:
                    print(
                        "ERROR: Unexpected '{}' line in SSF file".format(line))
        if Globals.verbosityLevel > 2:
            print("  " + _("Got {} SSF entries:").format(len(settingsDict)))
            if Globals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    print("    {}: {}".format(key, settingsDict[key]))
        self.ssfDict = settingsDict  # We'll keep a copy of just the SSF settings
        self.settingsDict = settingsDict.copy(
        )  # This will be all the combined settings

    # end of USFMBible.loadSSFData

    def loadBook(self, BBB, filename=None):
        """
        Load the requested book if it's not already loaded.
        """
        if Globals.verbosityLevel > 2:
            print("USFMBible.loadBook( {}, {} )".format(BBB, filename))
        if BBB in self.books: return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading USFM {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        if Globals.verbosityLevel > 2 or Globals.debugFlag:
            print(
                _("  USFMBible: Loading {} from {} from {}...").format(
                    BBB, self.name, self.sourceFolder))
        if filename is None: filename = self.possibleFilenameDict[BBB]
        UBB = USFMBibleBook(self.name, BBB)
        UBB.load(filename, self.sourceFolder, self.encoding)
        if UBB._rawLines:
            UBB.validateMarkers(
            )  # Usually activates InternalBibleBook.processLines()
            self.saveBook(UBB)
        else:
            logging.info("USFM book {} was completely blank".format(BBB))

    # end of USFMBible.loadBook

    def loadBookMP(self, BBB):
        """
        Multiprocessing version!
        Load the requested book if it's not already loaded.
        """
        if Globals.verbosityLevel > 2:
            print("USFMBible.loadBookMP( {} )".format(BBB))
        assert (BBB not in self.books)
        self.triedLoadingBook[BBB] = True
        if Globals.verbosityLevel > 2 or Globals.debugFlag:
            print(
                _("  USFMBible: Loading {} from {} from {}...").format(
                    BBB, self.name, self.sourceFolder))
        UBB = USFMBibleBook(self.name, BBB)
        UBB.load(self.possibleFilenameDict[BBB], self.sourceFolder,
                 self.encoding)
        UBB.validateMarkers(
        )  # Usually activates InternalBibleBook.processLines()
        return UBB

    # end of USFMBible.loadBookMP

    def load(self):
        """
        Load all the books.
        """
        if Globals.verbosityLevel > 1:
            print(
                _("USFMBible: Loading {} from {}...").format(
                    self.name, self.sourceFolder))

        if Globals.maxProcesses > 1:  # Load all the books as quickly as possible
            parameters = [
                BBB for BBB, filename in self.maximumPossibleFilenameTuples
            ]  # Can only pass a single parameter to map
            with multiprocessing.Pool(processes=Globals.maxProcesses
                                      ) as pool:  # start worker processes
                results = pool.map(self.loadBookMP,
                                   parameters)  # have the pool do our loads
                assert (len(results) == len(parameters))
                for bBook in results:
                    self.saveBook(bBook)
        else:  # Just single threaded
            # Load the books one by one -- assuming that they have regular Paratext style filenames
            try:
                loadDetails = self.maximumPossibleFilenameTuples
            except AttributeError:
                logging.critical("USFMBible.load " + _("has nothing to load!"))
                return

            for BBB, filename in loadDetails:
                loadedBook = self.loadBook(BBB, filename)  # also saves it
        self.doPostLoadProcessing()