def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None ): """ Create the internal ESFM Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "ESFM Bible object" self.objectTypeString = "ESFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( "ESFMBible.load: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if Globals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in '{}'".format( self.sourceFolder ) ) return # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule): print( self.USFMFilenamesObject ) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData( self.ssfFilepath ) self.name = self.givenName if self.name is None: for field in ('FullName','Name',): if field in self.settingsDict: self.name = self.settingsDict[field]; break if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "ESFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename self.dontLoadBook = [] self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {}
def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None ): """ Create the internal ESFM Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "ESFM Bible object" self.objectTypeString = "ESFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder ) ) return # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule): print( self.USFMFilenamesObject ) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData( self.ssfFilepath ) self.name = self.givenName if self.name is None: for field in ('FullName','Name',): if field in self.settingsDict: self.name = self.settingsDict[field]; break if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "ESFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename self.dontLoadBook = [] self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {}
def preload( self ): """ """ if BibleOrgSysGlobals.debugFlag or debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2: print( t("preload() from {}").format( self.sourceFolder ) ) # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder ) ) return # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule): print( self.USFMFilenamesObject ) if self.suppliedMetadata is None: self.suppliedMetadata = {} # Attempt to load the SSF file self.ssfFilepath = None ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] PTXSettingsDict = loadPTX7ProjectData( self, self.ssfFilepath ) if PTXSettingsDict: if 'PTX7' not in self.suppliedMetadata: self.suppliedMetadata['PTX7'] = {} self.suppliedMetadata['PTX7']['SSF'] = PTXSettingsDict self.applySuppliedMetadata( 'SSF' ) # Copy some to BibleObject.settingsDict #self.name = self.givenName #if self.name is None: #for field in ('FullName','Name',): #if field in self.settingsDict: self.name = self.settingsDict[field]; break #if not self.name: self.name = os.path.basename( self.sourceFolder ) #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash #if not self.name: self.name = "ESFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.availableBBBs.add( BBB ) self.possibleFilenameDict[BBB] = filename self.preloadDone = True
class ESFMBible( Bible ): """ Class to load and manipulate ESFM Bibles. """ def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None ): """ Create the internal ESFM Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "ESFM Bible object" self.objectTypeString = "ESFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( "ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder ) ) return # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule): print( self.USFMFilenamesObject ) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData( self.ssfFilepath ) self.name = self.givenName if self.name is None: for field in ('FullName','Name',): if field in self.settingsDict: self.name = self.settingsDict[field]; break if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "ESFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename self.dontLoadBook = [] self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {} # end of ESFMBible.__init_ def loadSSFData( self, ssfFilepath ): """Process the SSF data from the given filepath. Returns a dictionary.""" if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading SSF data from {!r}").format( ssfFilepath ) ) lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open( ssfFilepath, encoding='utf-8' ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1 and line and line[0]==chr(65279): #U+FEFF logging.info( "ESFMBible.loadSSFData: Detected UTF-16 Byte Order Marker in {}".format( ssfFilepath ) ) line = line[1:] # Remove the Byte Order Marker if line[-1]=='\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False if status==0 and line=="<ScriptureText>": status = 1 processed = True elif status==1 and line=="</ScriptureText>": status = 2 processed = True elif status==1 and line[0]=='<' and line.endswith('/>'): # Handle a self-closing (empty) field fieldname = line[1:-3] if line.endswith(' />') else line[1:-2] # Handle it with or without a space if ' ' not in fieldname: settingsDict[fieldname] = '' processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split( None, 1 ) if BibleOrgSysGlobals.debugFlag: assert( len(bits)==2 ) fieldname = bits[0] attributes = bits[1] #print( "attributes = {!r}".format( attributes) ) settingsDict[fieldname] = (contents, attributes) processed = True elif status==1 and line[0]=='<' and line[-1]=='>': ix1 = line.find('>') ix2 = line.find('</') if ix1!=-1 and ix2!=-1 and ix2>ix1: fieldname = line[1:ix1] contents = line[ix1+1:ix2] if ' ' not in fieldname and line[ix2+2:-1]==fieldname: settingsDict[fieldname] = contents processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split( None, 1 ) if BibleOrgSysGlobals.debugFlag: assert( len(bits)==2 ) fieldname = bits[0] attributes = bits[1] #print( "attributes = {!r}".format( attributes) ) if line[ix2+2:-1]==fieldname: settingsDict[fieldname] = (contents, attributes) processed = True if not processed: print( "ERROR: Unexpected {!r} line in SSF file".format( line ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Got {} SSF entries:").format( len(settingsDict) ) ) if BibleOrgSysGlobals.verbosityLevel > 3: for key in sorted(settingsDict): print( " {}: {}".format( key, settingsDict[key] ) ) self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings self.settingsDict = settingsDict.copy() # This will be all the combined settings # end of ESFMBible.loadSSFData def loadSemanticDictionary( self, BBB, filename ): """ """ if BibleOrgSysGlobals.verbosityLevel > 1: print( " " + _("Loading possible semantic dictionary from {}...").format( filename ) ) sourceFilepath = os.path.join( self.sourceFolder, filename ) originalBook = ESFMFile() originalBook.read( sourceFilepath ) count = 0 for marker,originalText in originalBook.lines: #print( marker, repr(originalText) ) if marker == 'rem' and originalText.startswith('ESFM '): if ' SEM' not in originalText: return elif marker == 'gl': if originalText[0] in ESFM_SEMANTIC_TAGS \ and originalText[1] == ' ' \ and len(originalText)>2: tagMarker = originalText[0] tagContent = originalText[2:] if tagMarker not in self.semanticDict: self.semanticDict[tagMarker] = {} if tagContent not in self.semanticDict[tagMarker]: self.semanticDict[tagMarker][tagContent] = [] count += 1 self.dontLoadBook.append( BBB ) if BibleOrgSysGlobals.verbosityLevel > 1: if count: print( "{} semantic entries added in {} categories".format( count, len(self.semanticDict) ) ) else: print( "No semantic entries found." ) # end of ESFMBible.loadSemanticDictionary def loadStrongsDictionary( self, BBB, filename ): """ """ if BibleOrgSysGlobals.verbosityLevel > 1: print( " " + _("Loading possible Strong's dictionary from {}...").format( filename ) ) sourceFilepath = os.path.join( self.sourceFolder, filename ) originalBook = ESFMFile() originalBook.read( sourceFilepath ) count = 0 for marker,originalText in originalBook.lines: #print( marker, repr(originalText) ) if marker == 'rem' and originalText.startswith('ESFM '): if ' STR' not in originalText: return elif marker == 'gl': if originalText[0] in 'HG': tagMarker = originalText[0] sNumber = originalText[1:] elif marker == 'html': dictEntry = originalText if tagMarker not in self.StrongsDict: self.StrongsDict[tagMarker] = {} if sNumber not in self.StrongsDict[tagMarker]: self.StrongsDict[tagMarker][sNumber] = dictEntry count += 1 self.dontLoadBook.append( BBB ) if BibleOrgSysGlobals.verbosityLevel > 1: if count: print( "{} Strong's entries added in {} categories".format( count, len(self.StrongsDict) ) ) else: print( "No Strong's entries found." ) # end of ESFMBible.loadStrongsDictionary def loadDictionaries( self ): """ Attempts to load the spelling, hyphenation, and semantic dictionaries if they exist. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( " " + _("Loading any dictionaries...") ) for BBB,filename in self.maximumPossibleFilenameTuples: if BBB=='XXD': self.loadSemanticDictionary( BBB, filename ) elif BBB=='XXE': self.loadStrongsDictionary( BBB, filename ) # end of ESFMBible.loadDictionaries def loadBook( self, BBB, filename=None ): """ Load the requested book if it's not already loaded. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBible.loadBook( {}, {} )".format( BBB, filename ) ) if BBB in self.books: return # Already loaded if BBB in self.dontLoadBook: return # Must be a dictionary that's already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading ESFM {} for {}".format( BBB, self.name ) ) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: try: print( _(" ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) except UnicodeEncodeError: print( _(" ESFMBible: Loading {}...").format( BBB ) ) if filename is None: filename = self.possibleFilenameDict[BBB] EBB = ESFMBibleBook( self, BBB ) EBB.load( filename, self.sourceFolder ) if EBB._rawLines: EBB.validateMarkers() # Usually activates InternalBibleBook.processLines() self.saveBook( EBB ) else: logging.info( "ESFM book {} was completely blank".format( BBB ) ) # end of ESFMBible.loadBook def _loadBookMP( self, BBB_Filename ): """ Multiprocessing version! Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing) Parameter is a 2-tuple containing BBB and the filename. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( "ESFMBible.loadBookMP( {} )".format( BBB_Filename ) ) BBB, filename = BBB_Filename assert( BBB not in self.books ) if BBB in self.dontLoadBook: return None self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) EBB = ESFMBibleBook( self, BBB ) EBB.load( self.possibleFilenameDict[BBB], self.sourceFolder ) EBB.validateMarkers() # Usually activates InternalBibleBook.processLines() if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" Finishing loading ESFM book {}.").format( BBB ) ) return EBB # end of ESFMBible.loadBookMP def load( self ): """ Load all the books. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( _("ESFMBible: Loading {} from {}...").format( self.name, self.sourceFolder ) ) if self.maximumPossibleFilenameTuples: # First try to load the dictionaries self.loadDictionaries() # Now load the books if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map if BibleOrgSysGlobals.verbosityLevel > 1: print( _("ESFMBible: Loading {} books using {} CPUs...").format( len(self.maximumPossibleFilenameTuples), BibleOrgSysGlobals.maxProcesses ) ) print( " NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." ) with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map( self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads assert( len(results) == len(self.maximumPossibleFilenameTuples) ) for bBook in results: if bBook is not None: self.saveBook( bBook ) # Saves them in the correct order else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB,filename in self.maximumPossibleFilenameTuples: #if BibleOrgSysGlobals.verbosityLevel > 1 or BibleOrgSysGlobals.debugFlag: #print( _(" ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) if BBB not in self.dontLoadBook: loadedBook = self.loadBook( BBB, filename ) # also saves it else: logging.critical( _("ESFMBible: No books to load in {}!").format( self.sourceFolder ) ) #print( self.getBookList() ) if 'Tag errors' in self.semanticDict: print( "Tag errors:", self.semanticDict['Tag errors'] ) if 'Missing' in self.semanticDict: print( "Missing:", self.semanticDict['Missing'] ) self.doPostLoadProcessing()
def preload(self): """ """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print(t("preload() from {}").format(self.sourceFolder)) # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error("Not sure what {!r} is in {}!".format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}". format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print("ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder)) return # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if BibleOrgSysGlobals.verbosityLevel > 3 or ( BibleOrgSysGlobals.debugFlag and debuggingThisModule): print(self.USFMFilenamesObject) if self.suppliedMetadata is None: self.suppliedMetadata = {} # Attempt to load the SSF file self.ssfFilepath = None ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] PTXSettingsDict = loadPTX7ProjectData(self, self.ssfFilepath) if PTXSettingsDict: if 'PTX7' not in self.suppliedMetadata: self.suppliedMetadata['PTX7'] = {} self.suppliedMetadata['PTX7']['SSF'] = PTXSettingsDict self.applySuppliedMetadata( 'SSF') # Copy some to BibleObject.settingsDict #self.name = self.givenName #if self.name is None: #for field in ('FullName','Name',): #if field in self.settingsDict: self.name = self.settingsDict[field]; break #if not self.name: self.name = os.path.basename( self.sourceFolder ) #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash #if not self.name: self.name = "ESFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.availableBBBs.add(BBB) self.possibleFilenameDict[BBB] = filename self.preloadDone = True
def ESFMBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for ESFM Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one ESFM Bible is found, returns the loaded ESFMBible object. """ if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks)) if BibleOrgSysGlobals.debugFlag or debuggingThisModule: assert givenFolderName and isinstance(givenFolderName, str) assert autoLoad in ( True, False, ) and autoLoadBooks in (True, False) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName)) return False if not os.path.isdir(givenFolderName): logging.critical( _("ESFMBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName)) return False # Check that there's a USFM Bible here first from USFMBible import USFMBibleFileCheck if not USFMBibleFileCheck(givenFolderName, strictCheck, discountSSF=False): # no autoloads return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print(" ESFMBibleFileCheck: Looking for files in given {}".format( givenFolderName)) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): if something == '__MACOSX': continue # don't visit these directories foundFolders.append(something) #elif os.path.isfile( somepath ): #somethingUpper = something.upper() #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ##ignore = False ##for ending in filenameEndingsToIgnore: ##if somethingUpper.endswith( ending): ignore=True; break ##if ignore: continue ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot ##foundFiles.append( something ) #if somethingUpperExt not in filenameEndingsToAccept: continue #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: #firstLine = BibleOrgSysGlobals.peekIntoFile( something, givenFolderName ) ##print( 'E1', repr(firstLine) ) #if firstLine is None: continue # seems we couldn't decode the file #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) ) #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM) #if not firstLine: continue # don't allow a blank first line #if firstLine[0] != '\\': continue # Must start with a backslash #foundFiles.append( something ) # See if there's an ESFMBible project here in this given folder numFound = 0 UFns = USFMFilenames( givenFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns) filenameTuples = UFns.getMaximumPossibleFilenameTuples( strictCheck=strictCheck) # Returns (BBB,filename) 2-tuples for BBB, fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith(fna): acceptFlag = True if not acceptFlag: filenameTuples.remove((BBB, fn)) if BibleOrgSysGlobals.verbosityLevel > 3: print(" Confirmed:", len(filenameTuples), filenameTuples) if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print(" Found {} ESFM file{}.".format( len(filenameTuples), '' if len(filenameTuples) == 1 else 's')) if filenameTuples: SSFs = UFns.getSSFFilenames() if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print("Got ESFM SSFs: ({}) {}".format(len(SSFs), SSFs)) ssfFilepath = os.path.join(givenFolderName, SSFs[0]) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBibleFileCheck got", numFound, givenFolderName) if numFound == 1 and (autoLoad or autoLoadBooks): eB = ESFMBible(givenFolderName) if autoLoadBooks: eB.load() # Load and process the file return eB return numFound # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning( _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName)) continue #if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName ) ) #foundSubfolders, foundSubfiles = [], [] #for something in os.listdir( tryFolderName ): #somepath = os.path.join( givenFolderName, thisFolderName, something ) #if os.path.isdir( somepath ): foundSubfolders.append( something ) #elif os.path.isfile( somepath ): #somethingUpper = something.upper() #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ##ignore = False ##for ending in filenameEndingsToIgnore: ##if somethingUpper.endswith( ending): ignore=True; break ##if ignore: continue ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot ##foundSubfiles.append( something ) #if somethingUpperExt not in filenameEndingsToAccept: continue #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: #firstLine = BibleOrgSysGlobals.peekIntoFile( something, tryFolderName ) ##print( 'E2', repr(firstLine) ) #if firstLine is None: continue # seems we couldn't decode the file #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) ) #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM) #if not firstLine: continue # don't allow a blank first line #if firstLine[0] != '\\': continue # Must start with a backslash #foundSubfiles.append( something ) # See if there's an ESFM Bible here in this folder UFns = USFMFilenames( tryFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns) filenameTuples = UFns.getMaximumPossibleFilenameTuples( strictCheck=strictCheck) # Returns (BBB,filename) 2-tuples for BBB, fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith(fna): acceptFlag = True if not acceptFlag: filenameTuples.remove((BBB, fn)) if BibleOrgSysGlobals.verbosityLevel > 3: print(" Confirmed:", len(filenameTuples), filenameTuples) if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples: print(" Found {} ESFM files: {}".format(len(filenameTuples), filenameTuples)) elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print(" Found {} ESFM file{}".format( len(filenameTuples), '' if len(filenameTuples) == 1 else 's')) if filenameTuples: SSFs = UFns.getSSFFilenames(searchAbove=True) if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print("Got ESFM SSFs: ({}) {}".format(len(SSFs), SSFs)) ssfFilepath = os.path.join(thisFolderName, SSFs[0]) foundProjects.append(tryFolderName) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): uB = ESFMBible(foundProjects[0]) if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def ESFMBibleFileCheck( givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False ): """ Given a folder, search for ESFM Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one ESFM Bible is found, returns the loaded ESFMBible object. """ if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks ) ) if BibleOrgSysGlobals.debugFlag or debuggingThisModule: assert givenFolderName and isinstance( givenFolderName, str ) assert autoLoad in (True,False,) and autoLoadBooks in (True,False) # Check that the given folder is readable if not os.access( givenFolderName, os.R_OK ): logging.critical( _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName ) ) return False if not os.path.isdir( givenFolderName ): logging.critical( _("ESFMBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName ) ) return False # Check that there's a USFM Bible here first from USFMBible import USFMBibleFileCheck if not USFMBibleFileCheck( givenFolderName, strictCheck, discountSSF=False ): # no autoloads return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in given {}".format( givenFolderName ) ) foundFolders, foundFiles = [], [] for something in os.listdir( givenFolderName ): somepath = os.path.join( givenFolderName, something ) if os.path.isdir( somepath ): if something == '__MACOSX': continue # don't visit these directories foundFolders.append( something ) #elif os.path.isfile( somepath ): #somethingUpper = something.upper() #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ##ignore = False ##for ending in filenameEndingsToIgnore: ##if somethingUpper.endswith( ending): ignore=True; break ##if ignore: continue ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot ##foundFiles.append( something ) #if somethingUpperExt not in filenameEndingsToAccept: continue #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: #firstLine = BibleOrgSysGlobals.peekIntoFile( something, givenFolderName ) ##print( 'E1', repr(firstLine) ) #if firstLine is None: continue # seems we couldn't decode the file #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) ) #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM) #if not firstLine: continue # don't allow a blank first line #if firstLine[0] != '\\': continue # Must start with a backslash #foundFiles.append( something ) # See if there's an ESFMBible project here in this given folder numFound = 0 UFns = USFMFilenames( givenFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns ) filenameTuples = UFns.getMaximumPossibleFilenameTuples( strictCheck=strictCheck ) # Returns (BBB,filename) 2-tuples for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith( fna ): acceptFlag = True if not acceptFlag: filenameTuples.remove( (BBB,fn) ) if BibleOrgSysGlobals.verbosityLevel > 3: print( " Confirmed:", len(filenameTuples), filenameTuples ) if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( " Found {} ESFM file{}.".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) ) if filenameTuples: SSFs = UFns.getSSFFilenames() if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got ESFM SSFs: ({}) {}".format( len(SSFs), SSFs ) ) ssfFilepath = os.path.join( givenFolderName, SSFs[0] ) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck got", numFound, givenFolderName ) if numFound == 1 and (autoLoad or autoLoadBooks): eB = ESFMBible( givenFolderName ) if autoLoadBooks: eB.load() # Load and process the file return eB return numFound # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted( foundFolders ): tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' ) if not os.access( tryFolderName, os.R_OK ): # The subfolder is not readable logging.warning( _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName ) ) continue #if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName ) ) #foundSubfolders, foundSubfiles = [], [] #for something in os.listdir( tryFolderName ): #somepath = os.path.join( givenFolderName, thisFolderName, something ) #if os.path.isdir( somepath ): foundSubfolders.append( something ) #elif os.path.isfile( somepath ): #somethingUpper = something.upper() #somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ##ignore = False ##for ending in filenameEndingsToIgnore: ##if somethingUpper.endswith( ending): ignore=True; break ##if ignore: continue ##if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot ##foundSubfiles.append( something ) #if somethingUpperExt not in filenameEndingsToAccept: continue #if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: #firstLine = BibleOrgSysGlobals.peekIntoFile( something, tryFolderName ) ##print( 'E2', repr(firstLine) ) #if firstLine is None: continue # seems we couldn't decode the file #if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff #logging.info( "ESFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( something ) ) #firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM) #if not firstLine: continue # don't allow a blank first line #if firstLine[0] != '\\': continue # Must start with a backslash #foundSubfiles.append( something ) # See if there's an ESFM Bible here in this folder UFns = USFMFilenames( tryFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns ) filenameTuples = UFns.getMaximumPossibleFilenameTuples( strictCheck=strictCheck ) # Returns (BBB,filename) 2-tuples for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith( fna ): acceptFlag = True if not acceptFlag: filenameTuples.remove( (BBB,fn) ) if BibleOrgSysGlobals.verbosityLevel > 3: print( " Confirmed:", len(filenameTuples), filenameTuples ) if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples: print( " Found {} ESFM files: {}".format( len(filenameTuples), filenameTuples ) ) elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( " Found {} ESFM file{}".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) ) if filenameTuples: SSFs = UFns.getSSFFilenames( searchAbove=True ) if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got ESFM SSFs: ({}) {}".format( len(SSFs), SSFs ) ) ssfFilepath = os.path.join( thisFolderName, SSFs[0] ) foundProjects.append( tryFolderName ) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck foundProjects", numFound, foundProjects ) if numFound == 1 and (autoLoad or autoLoadBooks): uB = ESFMBible( foundProjects[0] ) if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def __init__(self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None): """ Create the internal USFM Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = "USFM Bible object" self.objectTypeString = "USFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error( t("__init__: Not sure what '{}' is in {}!").format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( t("__init__: Surprised to see subfolders in '{}': {}"). format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if Globals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in '{}'").format( self.sourceFolder)) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule): print("USFMFilenamesObject", self.USFMFilenamesObject) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData(self.ssfFilepath) if self.encoding is None and 'Encoding' in self.ssfDict: # See if the SSF file gives some help to us ssfEncoding = self.ssfDict['Encoding'] if ssfEncoding == '65001': self.encoding = 'utf-8' else: if Globals.verbosityLevel > 0: print( t("__init__: File encoding in SSF is set to '{}'"). format(ssfEncoding)) if ssfEncoding.isdigit(): self.encoding = 'cp' + ssfEncoding if Globals.verbosityLevel > 0: print( t("__init__: Switched to '{}' file encoding"). format(self.encoding)) else: logging.critical( t("__init__: Unsure how to handle '{}' file encoding" ).format(ssfEncoding)) self.name = self.givenName if self.name is None: for field in ( 'FullName', 'Name', ): if field in self.settingsDict: self.name = self.settingsDict[field] break if not self.name: self.name = os.path.basename(self.sourceFolder) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1]) # Remove the final slash if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename
class ESFMBible(Bible): """ Class to load and manipulate ESFM Bibles. """ def __init__(self, sourceFolder, givenName=None, givenAbbreviation=None): """ Create the internal ESFM Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = "ESFM Bible object" self.objectTypeString = "ESFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation = sourceFolder, givenName, givenAbbreviation # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error("Not sure what {!r} is in {}!".format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( "ESFMBible.load: Surprised to see subfolders in {!r}: {}". format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print("ESFMBible: Couldn't find any files in {!r}".format( self.sourceFolder)) return # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if BibleOrgSysGlobals.verbosityLevel > 3 or ( BibleOrgSysGlobals.debugFlag and debuggingThisModule): print(self.USFMFilenamesObject) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData(self.ssfFilepath) self.name = self.givenName if self.name is None: for field in ( 'FullName', 'Name', ): if field in self.settingsDict: self.name = self.settingsDict[field] break if not self.name: self.name = os.path.basename(self.sourceFolder) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1]) # Remove the final slash if not self.name: self.name = "ESFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename self.dontLoadBook = [] self.spellingDict, self.StrongsDict, self.hyphenationDict, self.semanticDict = {}, {}, {}, {} # end of ESFMBible.__init_ def loadSSFData(self, ssfFilepath): """Process the SSF data from the given filepath. Returns a dictionary.""" if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading SSF data from {!r}").format(ssfFilepath)) lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open(ssfFilepath, encoding='utf-8' ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and line and line[0] == chr(65279): #U+FEFF logging.info( "ESFMBible.loadSSFData: Detected UTF-16 Byte Order Marker in {}" .format(ssfFilepath)) line = line[1:] # Remove the Byte Order Marker if line[-1] == '\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False if status == 0 and line == "<ScriptureText>": status = 1 processed = True elif status == 1 and line == "</ScriptureText>": status = 2 processed = True elif status == 1 and line[0] == '<' and line.endswith( '/>'): # Handle a self-closing (empty) field fieldname = line[1:-3] if line.endswith(' />') else line[ 1:-2] # Handle it with or without a space if ' ' not in fieldname: settingsDict[fieldname] = '' processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split(None, 1) if BibleOrgSysGlobals.debugFlag: assert (len(bits) == 2) fieldname = bits[0] attributes = bits[1] #print( "attributes = {!r}".format( attributes) ) settingsDict[fieldname] = (contents, attributes) processed = True elif status == 1 and line[0] == '<' and line[-1] == '>': ix1 = line.find('>') ix2 = line.find('</') if ix1 != -1 and ix2 != -1 and ix2 > ix1: fieldname = line[1:ix1] contents = line[ix1 + 1:ix2] if ' ' not in fieldname and line[ix2 + 2:-1] == fieldname: settingsDict[fieldname] = contents processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split(None, 1) if BibleOrgSysGlobals.debugFlag: assert (len(bits) == 2) fieldname = bits[0] attributes = bits[1] #print( "attributes = {!r}".format( attributes) ) if line[ix2 + 2:-1] == fieldname: settingsDict[fieldname] = (contents, attributes) processed = True if not processed: print( "ERROR: Unexpected {!r} line in SSF file".format(line)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" " + _("Got {} SSF entries:").format(len(settingsDict))) if BibleOrgSysGlobals.verbosityLevel > 3: for key in sorted(settingsDict): print(" {}: {}".format(key, settingsDict[key])) self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings self.settingsDict = settingsDict.copy( ) # This will be all the combined settings # end of ESFMBible.loadSSFData def loadSemanticDictionary(self, BBB, filename): """ """ if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("Loading possible semantic dictionary from {}...").format( filename)) sourceFilepath = os.path.join(self.sourceFolder, filename) originalBook = ESFMFile() originalBook.read(sourceFilepath) count = 0 for marker, originalText in originalBook.lines: #print( marker, repr(originalText) ) if marker == 'rem' and originalText.startswith('ESFM '): if ' SEM' not in originalText: return elif marker == 'gl': if originalText[0] in ESFM_SEMANTIC_TAGS \ and originalText[1] == ' ' \ and len(originalText)>2: tagMarker = originalText[0] tagContent = originalText[2:] if tagMarker not in self.semanticDict: self.semanticDict[tagMarker] = {} if tagContent not in self.semanticDict[tagMarker]: self.semanticDict[tagMarker][tagContent] = [] count += 1 self.dontLoadBook.append(BBB) if BibleOrgSysGlobals.verbosityLevel > 1: if count: print("{} semantic entries added in {} categories".format( count, len(self.semanticDict))) else: print("No semantic entries found.") # end of ESFMBible.loadSemanticDictionary def loadStrongsDictionary(self, BBB, filename): """ """ if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("Loading possible Strong's dictionary from {}...").format( filename)) sourceFilepath = os.path.join(self.sourceFolder, filename) originalBook = ESFMFile() originalBook.read(sourceFilepath) count = 0 for marker, originalText in originalBook.lines: #print( marker, repr(originalText) ) if marker == 'rem' and originalText.startswith('ESFM '): if ' STR' not in originalText: return elif marker == 'gl': if originalText[0] in 'HG': tagMarker = originalText[0] sNumber = originalText[1:] elif marker == 'html': dictEntry = originalText if tagMarker not in self.StrongsDict: self.StrongsDict[tagMarker] = {} if sNumber not in self.StrongsDict[tagMarker]: self.StrongsDict[tagMarker][sNumber] = dictEntry count += 1 self.dontLoadBook.append(BBB) if BibleOrgSysGlobals.verbosityLevel > 1: if count: print("{} Strong's entries added in {} categories".format( count, len(self.StrongsDict))) else: print("No Strong's entries found.") # end of ESFMBible.loadStrongsDictionary def loadDictionaries(self): """ Attempts to load the spelling, hyphenation, and semantic dictionaries if they exist. """ if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("Loading any dictionaries...")) for BBB, filename in self.maximumPossibleFilenameTuples: if BBB == 'XXD': self.loadSemanticDictionary(BBB, filename) elif BBB == 'XXE': self.loadStrongsDictionary(BBB, filename) # end of ESFMBible.loadDictionaries def loadBook(self, BBB, filename=None): """ Load the requested book if it's not already loaded. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBible.loadBook( {}, {} )".format(BBB, filename)) if BBB in self.books: return # Already loaded if BBB in self.dontLoadBook: return # Must be a dictionary that's already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading ESFM {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: try: print( _(" ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder)) except UnicodeEncodeError: print(_(" ESFMBible: Loading {}...").format(BBB)) if filename is None: filename = self.possibleFilenameDict[BBB] EBB = ESFMBibleBook(self, BBB) EBB.load(filename, self.sourceFolder) if EBB._rawLines: EBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() self.saveBook(EBB) else: logging.info("ESFM book {} was completely blank".format(BBB)) # end of ESFMBible.loadBook def _loadBookMP(self, BBB_Filename): """ Multiprocessing version! Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing) Parameter is a 2-tuple containing BBB and the filename. """ if BibleOrgSysGlobals.verbosityLevel > 3: print("ESFMBible.loadBookMP( {} )".format(BBB_Filename)) BBB, filename = BBB_Filename assert (BBB not in self.books) if BBB in self.dontLoadBook: return None self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder)) EBB = ESFMBibleBook(self, BBB) EBB.load(self.possibleFilenameDict[BBB], self.sourceFolder) EBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print(_(" Finishing loading ESFM book {}.").format(BBB)) return EBB # end of ESFMBible.loadBookMP def load(self): """ Load all the books. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( _("ESFMBible: Loading {} from {}...").format( self.name, self.sourceFolder)) if self.maximumPossibleFilenameTuples: # First try to load the dictionaries self.loadDictionaries() # Now load the books if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map if BibleOrgSysGlobals.verbosityLevel > 1: print( _("ESFMBible: Loading {} books using {} CPUs..."). format(len(self.maximumPossibleFilenameTuples), BibleOrgSysGlobals.maxProcesses)) print( " NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." ) with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map(self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads assert (len(results) == len( self.maximumPossibleFilenameTuples)) for bBook in results: if bBook is not None: self.saveBook( bBook) # Saves them in the correct order else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB, filename in self.maximumPossibleFilenameTuples: #if BibleOrgSysGlobals.verbosityLevel > 1 or BibleOrgSysGlobals.debugFlag: #print( _(" ESFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) if BBB not in self.dontLoadBook: loadedBook = self.loadBook(BBB, filename) # also saves it else: logging.critical( _("ESFMBible: No books to load in {}!").format( self.sourceFolder)) #print( self.getBookList() ) if 'Tag errors' in self.semanticDict: print("Tag errors:", self.semanticDict['Tag errors']) if 'Missing' in self.semanticDict: print("Missing:", self.semanticDict['Missing']) self.doPostLoadProcessing()
class USFMBible( Bible ): """ Class to load and manipulate USFM Bibles. """ def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None ): """ Create the internal USFM Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "USFM Bible object" self.objectTypeString = "USFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( t("__init__: Not sure what '{}' is in {}!").format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( t("__init__: Surprised to see subfolders in '{}': {}").format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if Globals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in '{}'").format( self.sourceFolder ) ) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule): print( "USFMFilenamesObject", self.USFMFilenamesObject ) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData( self.ssfFilepath ) if self.encoding is None and 'Encoding' in self.ssfDict: # See if the SSF file gives some help to us ssfEncoding = self.ssfDict['Encoding'] if ssfEncoding == '65001': self.encoding = 'utf-8' else: if Globals.verbosityLevel > 0: print( t("__init__: File encoding in SSF is set to '{}'").format( ssfEncoding ) ) if ssfEncoding.isdigit(): self.encoding = 'cp' + ssfEncoding if Globals.verbosityLevel > 0: print( t("__init__: Switched to '{}' file encoding").format( self.encoding ) ) else: logging.critical( t("__init__: Unsure how to handle '{}' file encoding").format( ssfEncoding ) ) self.name = self.givenName if self.name is None: for field in ('FullName','Name',): if field in self.settingsDict: self.name = self.settingsDict[field]; break if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename # end of USFMBible.__init_ def loadSSFData( self, ssfFilepath, encoding=None ): """Process the SSF data from the given filepath. Returns a dictionary.""" if Globals.verbosityLevel > 2: print( t("Loading SSF data from '{}' ({})").format( ssfFilepath, encoding ) ) if encoding is None: encoding = 'utf-8' lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open( ssfFilepath, encoding=encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1 and line and line[0]==chr(65279): #U+FEFF logging.info( t("loadSSFData: Detected UTF-16 Byte Order Marker in {}").format( ssfFilepath ) ) line = line[1:] # Remove the Byte Order Marker if line[-1]=='\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False if status==0 and line=="<ScriptureText>": status = 1 processed = True elif status==1 and line=="</ScriptureText>": status = 2 processed = True elif status==1 and line[0]=='<' and line.endswith('/>'): # Handle a self-closing (empty) field fieldname = line[1:-3] if line.endswith(' />') else line[1:-2] # Handle it with or without a space if ' ' not in fieldname: settingsDict[fieldname] = '' processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split( None, 1 ) if Globals.debugFlag: assert( len(bits)==2 ) fieldname = bits[0] attributes = bits[1] #print( "attributes = '{}'".format( attributes) ) settingsDict[fieldname] = (contents, attributes) processed = True elif status==1 and line[0]=='<' and line[-1]=='>': ix1 = line.index('>') ix2 = line.index('</') if ix1!=-1 and ix2!=-1 and ix2>ix1: fieldname = line[1:ix1] contents = line[ix1+1:ix2] if ' ' not in fieldname and line[ix2+2:-1]==fieldname: settingsDict[fieldname] = contents processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split( None, 1 ) if Globals.debugFlag: assert( len(bits)==2 ) fieldname = bits[0] attributes = bits[1] #print( "attributes = '{}'".format( attributes) ) if line[ix2+2:-1]==fieldname: settingsDict[fieldname] = (contents, attributes) processed = True if not processed: print( t("ERROR: Unexpected '{}' line in SSF file").format( line ) ) if Globals.verbosityLevel > 2: print( " " + t("Got {} SSF entries:").format( len(settingsDict) ) ) if Globals.verbosityLevel > 3: for key in sorted(settingsDict): try: print( " {}: {}".format( key, settingsDict[key] ) ) except UnicodeEncodeError: print( " {}: UNICODE ENCODING ERROR".format( key ) ) self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings self.settingsDict = settingsDict.copy() # This will be all the combined settings # end of USFMBible.loadSSFData def loadBook( self, BBB, filename=None ): """ Load the requested book if it's not already loaded. """ if Globals.verbosityLevel > 2: print( "USFMBible.loadBook( {}, {} )".format( BBB, filename ) ) if BBB in self.books: return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) ) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if Globals.verbosityLevel > 2 or Globals.debugFlag: print( _(" USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) if filename is None and BBB in self.possibleFilenameDict: filename = self.possibleFilenameDict[BBB] if filename is None: raise FileNotFoundError( "USFMBible.loadBook: Unable to find file for {}".format( BBB ) ) UBB = USFMBibleBook( self, BBB ) UBB.load( filename, self.sourceFolder, self.encoding ) if UBB._rawLines: UBB.validateMarkers() # Usually activates InternalBibleBook.processLines() self.saveBook( UBB ) else: logging.info( "USFM book {} was completely blank".format( BBB ) ) # end of USFMBible.loadBook def _loadBookMP( self, BBB_Filename ): """ Multiprocessing version! Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing) Parameter is a 2-tuple containing BBB and the filename. """ if Globals.verbosityLevel > 3: print( t("loadBookMP( {} )").format( BBB_Filename ) ) BBB, filename = BBB_Filename assert( BBB not in self.books ) self.triedLoadingBook[BBB] = True if Globals.verbosityLevel > 2 or Globals.debugFlag: print( ' ' + t("Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) UBB = USFMBibleBook( self, BBB ) UBB.load( self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding ) UBB.validateMarkers() # Usually activates InternalBibleBook.processLines() if Globals.verbosityLevel > 2 or Globals.debugFlag: print( _(" Finishing loading USFM book {}.").format( BBB ) ) return UBB # end of USFMBible.loadBookMP def load( self ): """ Load all the books. """ if Globals.verbosityLevel > 1: print( t("Loading {} from {}...").format( self.name, self.sourceFolder ) ) if self.maximumPossibleFilenameTuples: if Globals.maxProcesses > 1: # Load all the books as quickly as possible #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map if Globals.verbosityLevel > 1: print( t("Loading {} books using {} CPUs...").format( len(self.maximumPossibleFilenameTuples), Globals.maxProcesses ) ) print( " NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." ) with multiprocessing.Pool( processes=Globals.maxProcesses ) as pool: # start worker processes results = pool.map( self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads assert( len(results) == len(self.maximumPossibleFilenameTuples) ) for bBook in results: self.saveBook( bBook ) # Saves them in the correct order else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB,filename in self.maximumPossibleFilenameTuples: #if Globals.verbosityLevel > 1 or Globals.debugFlag: #print( _(" USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) loadedBook = self.loadBook( BBB, filename ) # also saves it else: logging.critical( t("No books to load in {}!").format( self.sourceFolder ) ) #print( self.getBookList() ) self.doPostLoadProcessing()
def preload( self ): """ Loads the SSF file if it can be found. Tries to determine USFM filename pattern. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( exp("preload() from {}").format( self.sourceFolder ) ) assert not self.preloadDone assert self.sourceFolder is not None # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): #print( repr(something) ) somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( exp("preload: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX',): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( exp("preload: Surprised to see subfolders in {!r}: {}").format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( exp("preload: Couldn't find any files in {!r}").format( self.sourceFolder ) ) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule): print( "USFMFilenamesObject", self.USFMFilenamesObject ) if self.suppliedMetadata is None: self.suppliedMetadata = {} if self.ssfFilepath is None: # it might have been loaded first # Attempt to load the SSF file #self.suppliedMetadata, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) #print( "ssfFilepathList", ssfFilepathList ) if len(ssfFilepathList) > 1: logging.error( exp("preload: Found multiple possible SSF files -- using first one: {}").format( ssfFilepathList ) ) if len(ssfFilepathList) >= 1: # Seems we found the right one from PTX7Bible import loadPTX7ProjectData PTXSettingsDict = loadPTX7ProjectData( self, ssfFilepathList[0] ) if PTXSettingsDict: if self.suppliedMetadata is None: self.suppliedMetadata = {} if 'PTX' not in self.suppliedMetadata: self.suppliedMetadata['PTX'] = {} self.suppliedMetadata['PTX']['SSF'] = PTXSettingsDict self.applySuppliedMetadata( 'SSF' ) # Copy some to BibleObject.settingsDict #self.name = self.givenName #if self.name is None: #for field in ('FullName','Name',): #if field in self.settingsDict: self.name = self.settingsDict[field]; break #if not self.name: self.name = os.path.basename( self.sourceFolder ) #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash #if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename self.preloadDone = True
class USFMBible( Bible ): """ Class to load and manipulate USFM Bibles. """ def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None ): """ Create the internal USFM Bible object. Note that sourceFolder can be None if we don't know that yet. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = 'USFM Bible object' self.objectTypeString = 'USFM' # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding self.ssfFilepath = None # end of USFMBible.__init_ def preload( self ): """ Loads the SSF file if it can be found. Tries to determine USFM filename pattern. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( exp("preload() from {}").format( self.sourceFolder ) ) assert not self.preloadDone assert self.sourceFolder is not None # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): #print( repr(something) ) somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( exp("preload: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX',): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( exp("preload: Surprised to see subfolders in {!r}: {}").format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( exp("preload: Couldn't find any files in {!r}").format( self.sourceFolder ) ) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if BibleOrgSysGlobals.verbosityLevel > 3 or (BibleOrgSysGlobals.debugFlag and debuggingThisModule): print( "USFMFilenamesObject", self.USFMFilenamesObject ) if self.suppliedMetadata is None: self.suppliedMetadata = {} if self.ssfFilepath is None: # it might have been loaded first # Attempt to load the SSF file #self.suppliedMetadata, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) #print( "ssfFilepathList", ssfFilepathList ) if len(ssfFilepathList) > 1: logging.error( exp("preload: Found multiple possible SSF files -- using first one: {}").format( ssfFilepathList ) ) if len(ssfFilepathList) >= 1: # Seems we found the right one from PTX7Bible import loadPTX7ProjectData PTXSettingsDict = loadPTX7ProjectData( self, ssfFilepathList[0] ) if PTXSettingsDict: if self.suppliedMetadata is None: self.suppliedMetadata = {} if 'PTX' not in self.suppliedMetadata: self.suppliedMetadata['PTX'] = {} self.suppliedMetadata['PTX']['SSF'] = PTXSettingsDict self.applySuppliedMetadata( 'SSF' ) # Copy some to BibleObject.settingsDict #self.name = self.givenName #if self.name is None: #for field in ('FullName','Name',): #if field in self.settingsDict: self.name = self.settingsDict[field]; break #if not self.name: self.name = os.path.basename( self.sourceFolder ) #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash #if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename self.preloadDone = True # end of USFMBible.preload def loadBook( self, BBB, filename=None ): """ Load the requested book into self.books if it's not already loaded. NOTE: You should ensure that preload() has been called first. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( "USFMBible.loadBook( {}, {} )".format( BBB, filename ) ) assert self.preloadDone if BBB not in self.bookNeedsReloading or not self.bookNeedsReloading[BBB]: if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print( " {} is already loaded -- returning".format( BBB ) ) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) ) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) ) if filename is None and BBB in self.possibleFilenameDict: filename = self.possibleFilenameDict[BBB] if filename is None: raise FileNotFoundError( "USFMBible.loadBook: Unable to find file for {}".format( BBB ) ) UBB = USFMBibleBook( self, BBB ) UBB.load( filename, self.sourceFolder, self.encoding ) if UBB._rawLines: UBB.validateMarkers() # Usually activates InternalBibleBook.processLines() self.stashBook( UBB ) else: logging.info( "USFM book {} was completely blank".format( BBB ) ) self.bookNeedsReloading[BBB] = False # end of USFMBible.loadBook def _loadBookMP( self, BBB_Filename_tuple ): """ Multiprocessing version! Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing) Parameter is a 2-tuple containing BBB and the filename. Returns the book info. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( exp("loadBookMP( {} )").format( BBB_Filename_tuple ) ) BBB, filename = BBB_Filename_tuple if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print( " {} is already loaded -- returning".format( BBB ) ) return self.books[BBB] # Already loaded #if BBB in self.triedLoadingBook: #logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) ) #return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( ' ' + exp("Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) ) UBB = USFMBibleBook( self, BBB ) UBB.load( self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding ) UBB.validateMarkers() # Usually activates InternalBibleBook.processLines() if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" Finishing loading USFM book {}.").format( BBB ) ) return UBB # end of USFMBible.loadBookMP def loadBooks( self ): """ Load all the Bible books. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( exp("Loading {} from {}…").format( self.name if self.name else self.abbreviation, self.sourceFolder ) ) if not self.preloadDone: self.preload() if self.maximumPossibleFilenameTuples: if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Loading {} {} books using {} CPUs…").format( len(self.maximumPossibleFilenameTuples), 'USFM', BibleOrgSysGlobals.maxProcesses ) ) print( _(" NOTE: Outputs (including error and warning messages) from loading various books may be interspersed.") ) with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map( self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads assert len(results) == len(self.maximumPossibleFilenameTuples) for bBook in results: self.stashBook( bBook ) # Saves them in the correct order else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB,filename in self.maximumPossibleFilenameTuples: #if BibleOrgSysGlobals.verbosityLevel>1 or BibleOrgSysGlobals.debugFlag: #print( _(" USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) ) #loadedBook = self.loadBook( BBB, filename ) # also saves it self.loadBook( BBB, filename ) # also saves it else: logging.critical( exp("No books to load in {}!").format( self.sourceFolder ) ) #print( self.getBookList() ) self.doPostLoadProcessing() # end of USFMBible.loadBooks def load( self ): self.loadBooks()
def preload(self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None): """ Loads the SSF file if it can be found. Tries to determine USFM filename pattern. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( t("preload( {} {} {} {} )").format(sourceFolder, givenName, givenAbbreviation, encoding)) if BibleOrgSysGlobals.debugFlag: assert (sourceFolder) self.sourceFolder = sourceFolder if givenName: self.givenName = givenName if givenAbbreviation: self.givenAbbreviation = givenAbbreviation if encoding: self.encoding = encoding # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error( t("__init__: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( t("__init__: Surprised to see subfolders in {!r}: {}"). format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in {!r}").format( self.sourceFolder)) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if BibleOrgSysGlobals.verbosityLevel > 3 or ( BibleOrgSysGlobals.debugFlag and debuggingThisModule): print("USFMFilenamesObject", self.USFMFilenamesObject) if self.ssfFilepath is None: # it might have been loaded first # Attempt to load the SSF file self.ssfDict, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) if len(ssfFilepathList) == 1: # Seems we found the right one self.loadSSFData(ssfFilepathList[0]) self.name = self.givenName if self.name is None: for field in ( 'FullName', 'Name', ): if field in self.settingsDict: self.name = self.settingsDict[field] break if not self.name: self.name = os.path.basename(self.sourceFolder) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1]) # Remove the final slash if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename
class USFMBible(Bible): """ Class to load and manipulate USFM Bibles. """ def __init__(self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None): """ Create the internal USFM Bible object. Note that sourceFolder can be None if we don't know that yet. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = "USFM Bible object" self.objectTypeString = "USFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding self.ssfFilepath, self.ssfDict, self.settingsDict = None, {}, {} if sourceFolder is not None: self.preload(sourceFolder) # end of USFMBible.__init_ def preload(self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None): """ Loads the SSF file if it can be found. Tries to determine USFM filename pattern. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( t("preload( {} {} {} {} )").format(sourceFolder, givenName, givenAbbreviation, encoding)) if BibleOrgSysGlobals.debugFlag: assert (sourceFolder) self.sourceFolder = sourceFolder if givenName: self.givenName = givenName if givenAbbreviation: self.givenAbbreviation = givenAbbreviation if encoding: self.encoding = encoding # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error( t("__init__: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( t("__init__: Surprised to see subfolders in {!r}: {}"). format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in {!r}").format( self.sourceFolder)) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if BibleOrgSysGlobals.verbosityLevel > 3 or ( BibleOrgSysGlobals.debugFlag and debuggingThisModule): print("USFMFilenamesObject", self.USFMFilenamesObject) if self.ssfFilepath is None: # it might have been loaded first # Attempt to load the SSF file self.ssfDict, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) if len(ssfFilepathList) == 1: # Seems we found the right one self.loadSSFData(ssfFilepathList[0]) self.name = self.givenName if self.name is None: for field in ( 'FullName', 'Name', ): if field in self.settingsDict: self.name = self.settingsDict[field] break if not self.name: self.name = os.path.basename(self.sourceFolder) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1]) # Remove the final slash if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename # end of USFMBible.preload def loadSSFData(self, ssfFilepath, encoding=None): """ Process the SSF data from the given filepath. Returns a dictionary. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( t("Loading SSF data from {!r} ({})").format( ssfFilepath, encoding)) if encoding is None: encoding = 'utf-8' self.ssfFilepath = ssfFilepath lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open(ssfFilepath, encoding=encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and line and line[0] == chr(65279): #U+FEFF logging.info( t("loadSSFData: Detected UTF-16 Byte Order Marker in {}" ).format(ssfFilepath)) line = line[1:] # Remove the Byte Order Marker if line[-1] == '\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False if status == 0 and line == "<ScriptureText>": status = 1 processed = True elif status == 1 and line == "</ScriptureText>": status = 9 processed = True elif status == 1 and line[0] == '<' and line.endswith( '/>'): # Handle a self-closing (empty) field fieldname = line[1:-3] if line.endswith(' />') else line[ 1:-2] # Handle it with or without a space if ' ' not in fieldname: settingsDict[fieldname] = '' processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split(None, 1) if BibleOrgSysGlobals.debugFlag: assert (len(bits) == 2) fieldname = bits[0] attributes = bits[1] #print( "attributes = {!r}".format( attributes) ) settingsDict[fieldname] = (contents, attributes) processed = True elif status == 1 and line[0] == '<' and line[-1] == '>': ix1 = line.find('>') ix2 = line.find('</') if ix1 != -1 and ix2 != -1 and ix2 > ix1: fieldname = line[1:ix1] contents = line[ix1 + 1:ix2] if ' ' not in fieldname and line[ix2 + 2:-1] == fieldname: settingsDict[fieldname] = contents processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split(None, 1) if BibleOrgSysGlobals.debugFlag: assert (len(bits) == 2) fieldname = bits[0] attributes = bits[1] #print( "attributes = {!r}".format( attributes) ) if line[ix2 + 2:-1] == fieldname: settingsDict[fieldname] = (contents, attributes) processed = True elif status == 1 and line[0] == '<ValidCharacters>' and line[ -1] == '>': fieldname = 'ValidCharacters' if not processed: print( _("ERROR: Unexpected {} line in SSF file").format( repr(line))) if status == 0: logging.error("SSF file was empty: {}".format(self.ssfFilepath)) status = 9 if BibleOrgSysGlobals.debugFlag: assert (status == 9) if BibleOrgSysGlobals.verbosityLevel > 2: print(" " + t("Got {} SSF entries:").format(len(settingsDict))) if BibleOrgSysGlobals.verbosityLevel > 3: for key in sorted(settingsDict): try: print(" {}: {}".format(key, settingsDict[key])) except UnicodeEncodeError: print(" {}: UNICODE ENCODING ERROR".format(key)) self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings self.settingsDict = settingsDict.copy( ) # This will be all the combined settings # Determine our encoding while we're at it if self.encoding is None and 'Encoding' in self.ssfDict: # See if the SSF file gives some help to us ssfEncoding = self.ssfDict['Encoding'] if ssfEncoding == '65001': self.encoding = 'utf-8' else: if BibleOrgSysGlobals.verbosityLevel > 0: print( t("__init__: File encoding in SSF is set to {!r}"). format(ssfEncoding)) if ssfEncoding.isdigit(): self.encoding = 'cp' + ssfEncoding if BibleOrgSysGlobals.verbosityLevel > 0: print( t("__init__: Switched to {!r} file encoding"). format(self.encoding)) else: logging.critical( t("__init__: Unsure how to handle {!r} file encoding"). format(ssfEncoding)) # end of USFMBible.loadSSFData def loadBook(self, BBB, filename=None): """ Load the requested book into self.books if it's not already loaded. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("USFMBible.loadBook( {}, {} )".format(BBB, filename)) if BBB in self.books: return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder)) if filename is None and BBB in self.possibleFilenameDict: filename = self.possibleFilenameDict[BBB] if filename is None: raise FileNotFoundError( "USFMBible.loadBook: Unable to find file for {}".format(BBB)) UBB = USFMBibleBook(self, BBB) UBB.load(filename, self.sourceFolder, self.encoding) if UBB._rawLines: UBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() self.saveBook(UBB) else: logging.info("USFM book {} was completely blank".format(BBB)) # end of USFMBible.loadBook def _loadBookMP(self, BBB_Filename): """ Multiprocessing version! Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing) Parameter is a 2-tuple containing BBB and the filename. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(t("loadBookMP( {} )").format(BBB_Filename)) BBB, filename = BBB_Filename assert (BBB not in self.books) self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print(' ' + t("Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder)) UBB = USFMBibleBook(self, BBB) UBB.load(self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding) UBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print(_(" Finishing loading USFM book {}.").format(BBB)) return UBB # end of USFMBible.loadBookMP def load(self): """ Load all the books. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( t("Loading {} from {}...").format(self.name, self.sourceFolder)) if self.maximumPossibleFilenameTuples: if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map if BibleOrgSysGlobals.verbosityLevel > 1: print( t("Loading {} books using {} CPUs...").format( len(self.maximumPossibleFilenameTuples), BibleOrgSysGlobals.maxProcesses)) print( " NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." ) with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map(self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads assert (len(results) == len( self.maximumPossibleFilenameTuples)) for bBook in results: self.saveBook(bBook) # Saves them in the correct order else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB, filename in self.maximumPossibleFilenameTuples: #if BibleOrgSysGlobals.verbosityLevel > 1 or BibleOrgSysGlobals.debugFlag: #print( _(" USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder ) ) loadedBook = self.loadBook(BBB, filename) # also saves it else: logging.critical( t("No books to load in {}!").format(self.sourceFolder)) #print( self.getBookList() ) self.doPostLoadProcessing()
class USFMBible(Bible): """ Class to load and manipulate USFM Bibles. """ def __init__(self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None): """ Create the internal USFM Bible object. Note that sourceFolder can be None if we don't know that yet. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = 'USFM Bible object' self.objectTypeString = 'USFM' # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding self.ssfFilepath = None # end of USFMBible.__init_ def preload(self): """ Loads the SSF file if it can be found. Tries to determine USFM filename pattern. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print(exp("preload() from {}").format(self.sourceFolder)) assert not self.preloadDone assert self.sourceFolder is not None # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): #print( repr(something) ) somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error( exp("preload: Not sure what {!r} is in {}!").format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX', ): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( exp("preload: Surprised to see subfolders in {!r}: {}"). format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( exp("preload: Couldn't find any files in {!r}").format( self.sourceFolder)) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if BibleOrgSysGlobals.verbosityLevel > 3 or ( BibleOrgSysGlobals.debugFlag and debuggingThisModule): print("USFMFilenamesObject", self.USFMFilenamesObject) if self.suppliedMetadata is None: self.suppliedMetadata = {} if self.ssfFilepath is None: # it might have been loaded first # Attempt to load the SSF file #self.suppliedMetadata, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) #print( "ssfFilepathList", ssfFilepathList ) if len(ssfFilepathList) > 1: logging.error( exp("preload: Found multiple possible SSF files -- using first one: {}" ).format(ssfFilepathList)) if len(ssfFilepathList) >= 1: # Seems we found the right one from PTX7Bible import loadPTX7ProjectData PTXSettingsDict = loadPTX7ProjectData(self, ssfFilepathList[0]) if PTXSettingsDict: if self.suppliedMetadata is None: self.suppliedMetadata = {} if 'PTX7' not in self.suppliedMetadata: self.suppliedMetadata['PTX7'] = {} self.suppliedMetadata['PTX7']['SSF'] = PTXSettingsDict self.applySuppliedMetadata( 'SSF') # Copy some to BibleObject.settingsDict #self.name = self.givenName #if self.name is None: #for field in ('FullName','Name',): #if field in self.settingsDict: self.name = self.settingsDict[field]; break #if not self.name: self.name = os.path.basename( self.sourceFolder ) #if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash #if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.availableBBBs.add(BBB) self.possibleFilenameDict[BBB] = filename self.preloadDone = True # end of USFMBible.preload def loadBook(self, BBB, filename=None): """ Load the requested book into self.books if it's not already loaded. NOTE: You should ensure that preload() has been called first. """ if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print("USFMBible.loadBook( {}, {} )".format(BBB, filename)) assert self.preloadDone if BBB not in self.bookNeedsReloading or not self.bookNeedsReloading[ BBB]: if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print(" {} is already loaded -- returning".format(BBB)) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _(" USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder)) if filename is None and BBB in self.possibleFilenameDict: filename = self.possibleFilenameDict[BBB] if filename is None: raise FileNotFoundError( "USFMBible.loadBook: Unable to find file for {}".format(BBB)) UBB = USFMBibleBook(self, BBB) UBB.load(filename, self.sourceFolder, self.encoding) if UBB._rawLines: UBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() self.stashBook(UBB) else: logging.info("USFM book {} was completely blank".format(BBB)) self.bookNeedsReloading[BBB] = False # end of USFMBible.loadBook def _loadBookMP(self, BBB_Filename_duple): """ Multiprocessing version! Load the requested book if it's not already loaded (but doesn't save it as that is not safe for multiprocessing) Parameter is a 2-tuple containing BBB and the filename. Returns the book info. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(exp("loadBookMP( {} )").format(BBB_Filename_duple)) BBB, filename = BBB_Filename_duple if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print(" {} is already loaded -- returning".format(BBB)) return self.books[BBB] # Already loaded #if BBB in self.triedLoadingBook: #logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name ) ) #return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print(' ' + exp("Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder)) UBB = USFMBibleBook(self, BBB) UBB.load(self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding) UBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print(_(" Finishing loading USFM book {}.").format(BBB)) return UBB # end of USFMBible.loadBookMP def loadBooks(self): """ Load all the Bible books. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( exp("Loading {} from {}…").format(self.getAName(), self.sourceFolder)) if not self.preloadDone: self.preload() if self.maximumPossibleFilenameTuples: if BibleOrgSysGlobals.maxProcesses > 1 \ and not BibleOrgSysGlobals.alreadyMultiprocessing: # Get our subprocesses ready and waiting for work # Load all the books as quickly as possible #parameters = [BBB for BBB,filename in self.maximumPossibleFilenameTuples] # Can only pass a single parameter to map if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Loading {} {} books using {} CPUs…").format( len(self.maximumPossibleFilenameTuples), 'USFM', BibleOrgSysGlobals.maxProcesses)) print( _(" NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." )) BibleOrgSysGlobals.alreadyMultiprocessing = True with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map(self._loadBookMP, self.maximumPossibleFilenameTuples ) # have the pool do our loads assert len(results) == len( self.maximumPossibleFilenameTuples) for bBook in results: self.stashBook( bBook) # Saves them in the correct order BibleOrgSysGlobals.alreadyMultiprocessing = False else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB, filename in self.maximumPossibleFilenameTuples: #if BibleOrgSysGlobals.verbosityLevel>1 or BibleOrgSysGlobals.debugFlag: #print( _(" USFMBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) ) #loadedBook = self.loadBook( BBB, filename ) # also saves it self.loadBook(BBB, filename) # also saves it else: logging.critical( exp("No books to load in {}!").format(self.sourceFolder)) #print( self.getBookList() ) self.doPostLoadProcessing() # end of USFMBible.loadBooks def load(self): self.loadBooks()
def ESFMBibleFileCheck( givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False ): """ Given a folder, search for ESFM Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one ESFM Bible is found, returns the loaded ESFMBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck( {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad ) ) if BibleOrgSysGlobals.debugFlag: assert( givenFolderName and isinstance( givenFolderName, str ) ) if BibleOrgSysGlobals.debugFlag: assert( autoLoad in (True,False,) and autoLoadBooks in (True,False) ) # Check that the given folder is readable if not os.access( givenFolderName, os.R_OK ): logging.critical( _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName ) ) return False if not os.path.isdir( givenFolderName ): logging.critical( _("ESFMBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName ) ) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in given {}".format( givenFolderName ) ) foundFolders, foundFiles = [], [] for something in os.listdir( givenFolderName ): somepath = os.path.join( givenFolderName, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) #ignore = False #for ending in filenameEndingsToIgnore: #if somethingUpper.endswith( ending): ignore=True; break #if ignore: continue #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot #foundFiles.append( something ) if somethingUpperExt in filenameEndingsToAccept: foundFiles.append( something ) if '__MACOSX' in foundFolders: foundFolders.remove( '__MACOSX' ) # don't visit these directories # See if there's an ESFMBible project here in this given folder numFound = 0 UFns = USFMFilenames( givenFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns ) filenameTuples = UFns.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith( fna ): acceptFlag = True if not acceptFlag: filenameTuples.remove( (BBB,fn) ) if BibleOrgSysGlobals.verbosityLevel > 3: print( " Confirmed:", len(filenameTuples), filenameTuples ) if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( " Found {} ESFM file{}.".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) ) if filenameTuples: SSFs = UFns.getSSFFilenames() if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got SSFs:", SSFs ) ssfFilepath = os.path.join( givenFolderName, SSFs[0] ) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck got", numFound, givenFolderName ) if numFound == 1 and (autoLoad or autoLoadBooks): eB = ESFMBible( givenFolderName ) if autoLoadBooks: eB.load() # Load and process the file return eB return numFound # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted( foundFolders ): tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' ) if not os.access( tryFolderName, os.R_OK ): # The subfolder is not readable logging.warning( _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName ) ) continue if BibleOrgSysGlobals.verbosityLevel > 3: print( " ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName ) ) foundSubfolders, foundSubfiles = [], [] for something in os.listdir( tryFolderName ): somepath = os.path.join( givenFolderName, thisFolderName, something ) if os.path.isdir( somepath ): foundSubfolders.append( something ) elif os.path.isfile( somepath ): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) #ignore = False #for ending in filenameEndingsToIgnore: #if somethingUpper.endswith( ending): ignore=True; break #if ignore: continue #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot #foundSubfiles.append( something ) if somethingUpperExt in filenameEndingsToAccept: foundSubfiles.append( something ) # See if there's an ESFM Bible here in this folder UFns = USFMFilenames( tryFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print( UFns ) filenameTuples = UFns.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples for BBB,fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith( fna ): acceptFlag = True if not acceptFlag: filenameTuples.remove( (BBB,fn) ) if BibleOrgSysGlobals.verbosityLevel > 3: print( " Confirmed:", len(filenameTuples), filenameTuples ) if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples: print( " Found {} ESFM files: {}".format( len(filenameTuples), filenameTuples ) ) elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print( " Found {} ESFM file{}".format( len(filenameTuples), '' if len(filenameTuples)==1 else 's' ) ) if filenameTuples: SSFs = UFns.getSSFFilenames( searchAbove=True ) if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print( "Got SSFs:", SSFs ) ssfFilepath = os.path.join( thisFolderName, SSFs[0] ) foundProjects.append( tryFolderName ) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "ESFMBibleFileCheck foundProjects", numFound, foundProjects ) if numFound == 1 and (autoLoad or autoLoadBooks): uB = ESFMBible( foundProjects[0] ) if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def __init__( self, sourceFolder, givenName=None, givenAbbreviation=None, encoding=None ): """ Create the internal USFM Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "USFM Bible object" self.objectTypeString = "USFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): foundFiles.append( something ) else: logging.error( t("__init__: Not sure what '{}' is in {}!").format( somepath, self.sourceFolder ) ) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith( 'Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append( folderName ) if unexpectedFolders: logging.info( t("__init__: Surprised to see subfolders in '{}': {}").format( self.sourceFolder, unexpectedFolders ) ) if not foundFiles: if Globals.verbosityLevel > 0: print( t("__init__: Couldn't find any files in '{}'").format( self.sourceFolder ) ) raise FileNotFoundError # No use continuing self.USFMFilenamesObject = USFMFilenames( self.sourceFolder ) if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule): print( "USFMFilenamesObject", self.USFMFilenamesObject ) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True ) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData( self.ssfFilepath ) if self.encoding is None and 'Encoding' in self.ssfDict: # See if the SSF file gives some help to us ssfEncoding = self.ssfDict['Encoding'] if ssfEncoding == '65001': self.encoding = 'utf-8' else: if Globals.verbosityLevel > 0: print( t("__init__: File encoding in SSF is set to '{}'").format( ssfEncoding ) ) if ssfEncoding.isdigit(): self.encoding = 'cp' + ssfEncoding if Globals.verbosityLevel > 0: print( t("__init__: Switched to '{}' file encoding").format( self.encoding ) ) else: logging.critical( t("__init__: Unsure how to handle '{}' file encoding").format( ssfEncoding ) ) self.name = self.givenName if self.name is None: for field in ('FullName','Name',): if field in self.settingsDict: self.name = self.settingsDict[field]; break if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples() # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename
def ESFMBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for ESFM Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one ESFM Bible is found, returns the loaded ESFMBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBibleFileCheck( {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad)) if BibleOrgSysGlobals.debugFlag: assert (givenFolderName and isinstance(givenFolderName, str)) if BibleOrgSysGlobals.debugFlag: assert (autoLoad in ( True, False, ) and autoLoadBooks in (True, False)) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("ESFMBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName)) return False if not os.path.isdir(givenFolderName): logging.critical( _("ESFMBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName)) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print(" ESFMBibleFileCheck: Looking for files in given {}".format( givenFolderName)) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) #ignore = False #for ending in filenameEndingsToIgnore: #if somethingUpper.endswith( ending): ignore=True; break #if ignore: continue #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot #foundFiles.append( something ) if somethingUpperExt in filenameEndingsToAccept: foundFiles.append(something) if '__MACOSX' in foundFolders: foundFolders.remove('__MACOSX') # don't visit these directories # See if there's an ESFMBible project here in this given folder numFound = 0 UFns = USFMFilenames( givenFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns) filenameTuples = UFns.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples for BBB, fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith(fna): acceptFlag = True if not acceptFlag: filenameTuples.remove((BBB, fn)) if BibleOrgSysGlobals.verbosityLevel > 3: print(" Confirmed:", len(filenameTuples), filenameTuples) if BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print(" Found {} ESFM file{}.".format( len(filenameTuples), '' if len(filenameTuples) == 1 else 's')) if filenameTuples: SSFs = UFns.getSSFFilenames() if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print("Got SSFs:", SSFs) ssfFilepath = os.path.join(givenFolderName, SSFs[0]) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBibleFileCheck got", numFound, givenFolderName) if numFound == 1 and (autoLoad or autoLoadBooks): eB = ESFMBible(givenFolderName) if autoLoadBooks: eB.load() # Load and process the file return eB return numFound # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning( _("ESFMBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName)) continue if BibleOrgSysGlobals.verbosityLevel > 3: print(" ESFMBibleFileCheck: Looking for files in {}".format( tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) #ignore = False #for ending in filenameEndingsToIgnore: #if somethingUpper.endswith( ending): ignore=True; break #if ignore: continue #if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot #foundSubfiles.append( something ) if somethingUpperExt in filenameEndingsToAccept: foundSubfiles.append(something) # See if there's an ESFM Bible here in this folder UFns = USFMFilenames( tryFolderName ) # Assuming they have standard Paratext style filenames if BibleOrgSysGlobals.verbosityLevel > 2: print(UFns) filenameTuples = UFns.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples for BBB, fn in filenameTuples[:]: # Only accept our specific file extensions acceptFlag = False for fna in filenameEndingsToAccept: if fn.endswith(fna): acceptFlag = True if not acceptFlag: filenameTuples.remove((BBB, fn)) if BibleOrgSysGlobals.verbosityLevel > 3: print(" Confirmed:", len(filenameTuples), filenameTuples) if BibleOrgSysGlobals.verbosityLevel > 2 and filenameTuples: print(" Found {} ESFM files: {}".format(len(filenameTuples), filenameTuples)) elif BibleOrgSysGlobals.verbosityLevel > 1 and filenameTuples: print(" Found {} ESFM file{}".format( len(filenameTuples), '' if len(filenameTuples) == 1 else 's')) if filenameTuples: SSFs = UFns.getSSFFilenames(searchAbove=True) if SSFs: if BibleOrgSysGlobals.verbosityLevel > 2: print("Got SSFs:", SSFs) ssfFilepath = os.path.join(thisFolderName, SSFs[0]) foundProjects.append(tryFolderName) numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("ESFMBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): uB = ESFMBible(foundProjects[0]) if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
class USFMBible(Bible): """ Class to load and manipulate USFM Bibles. """ def __init__(self, sourceFolder, givenName=None, givenAbbreviation=None, encoding='utf-8'): """ Create the internal USFM Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = "USFM Bible object" self.objectTypeString = "USFM" # Now we can set our object variables self.sourceFolder, self.givenName, self.abbreviation, self.encoding = sourceFolder, givenName, givenAbbreviation, encoding # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.sourceFolder): somepath = os.path.join(self.sourceFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error("Not sure what '{}' is in {}!".format( somepath, self.sourceFolder)) if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName.startswith('Interlinear_'): continue if folderName in ('__MACOSX'): continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( "USFMBible.load: Surprised to see subfolders in '{}': {}". format(self.sourceFolder, unexpectedFolders)) if not foundFiles: if Globals.verbosityLevel > 0: print("USFMBible: Couldn't find any files in '{}'".format( self.sourceFolder)) return # No use continuing self.USFMFilenamesObject = USFMFilenames(self.sourceFolder) if Globals.verbosityLevel > 3 or (Globals.debugFlag and debuggingThisModule): print(self.USFMFilenamesObject) # Attempt to load the SSF file self.ssfFilepath, self.settingsDict = {}, {} ssfFilepathList = self.USFMFilenamesObject.getSSFFilenames( searchAbove=True, auto=True) if len(ssfFilepathList) == 1: # Seems we found the right one self.ssfFilepath = ssfFilepathList[0] self.loadSSFData(self.ssfFilepath) self.name = self.givenName if self.name is None: for field in ( 'FullName', 'Name', ): if field in self.settingsDict: self.name = self.settingsDict[field] break if not self.name: self.name = os.path.basename(self.sourceFolder) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1]) # Remove the final slash if not self.name: self.name = "USFM Bible" # Find the filenames of all our books self.maximumPossibleFilenameTuples = self.USFMFilenamesObject.getMaximumPossibleFilenameTuples( ) # Returns (BBB,filename) 2-tuples self.possibleFilenameDict = {} for BBB, filename in self.maximumPossibleFilenameTuples: self.possibleFilenameDict[BBB] = filename # end of USFMBible.__init_ def loadSSFData(self, ssfFilepath, encoding='utf-8'): """Process the SSF data from the given filepath. Returns a dictionary.""" if Globals.verbosityLevel > 2: print(_("Loading SSF data from '{}'").format(ssfFilepath)) lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open(ssfFilepath, encoding=encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and line and line[0] == chr(65279): #U+FEFF logging.info( "USFMBible.loadSSFData: Detected UTF-16 Byte Order Marker in {}" .format(ssfFilepath)) line = line[1:] # Remove the Byte Order Marker if line[-1] == '\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False if status == 0 and line == "<ScriptureText>": status = 1 processed = True elif status == 1 and line == "</ScriptureText>": status = 2 processed = True elif status == 1 and line[0] == '<' and line.endswith( '/>'): # Handle a self-closing (empty) field fieldname = line[1:-3] if line.endswith(' />') else line[ 1:-2] # Handle it with or without a space if ' ' not in fieldname: settingsDict[fieldname] = '' processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split(None, 1) if Globals.debugFlag: assert (len(bits) == 2) fieldname = bits[0] attributes = bits[1] #print( "attributes = '{}'".format( attributes) ) settingsDict[fieldname] = (contents, attributes) processed = True elif status == 1 and line[0] == '<' and line[-1] == '>': ix1 = line.index('>') ix2 = line.index('</') if ix1 != -1 and ix2 != -1 and ix2 > ix1: fieldname = line[1:ix1] contents = line[ix1 + 1:ix2] if ' ' not in fieldname and line[ix2 + 2:-1] == fieldname: settingsDict[fieldname] = contents processed = True elif ' ' in fieldname: # Some fields (like "Naming") may contain attributes bits = fieldname.split(None, 1) if Globals.debugFlag: assert (len(bits) == 2) fieldname = bits[0] attributes = bits[1] #print( "attributes = '{}'".format( attributes) ) if line[ix2 + 2:-1] == fieldname: settingsDict[fieldname] = (contents, attributes) processed = True if not processed: print( "ERROR: Unexpected '{}' line in SSF file".format(line)) if Globals.verbosityLevel > 2: print(" " + _("Got {} SSF entries:").format(len(settingsDict))) if Globals.verbosityLevel > 3: for key in sorted(settingsDict): print(" {}: {}".format(key, settingsDict[key])) self.ssfDict = settingsDict # We'll keep a copy of just the SSF settings self.settingsDict = settingsDict.copy( ) # This will be all the combined settings # end of USFMBible.loadSSFData def loadBook(self, BBB, filename=None): """ Load the requested book if it's not already loaded. """ if Globals.verbosityLevel > 2: print("USFMBible.loadBook( {}, {} )".format(BBB, filename)) if BBB in self.books: return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading USFM {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True if Globals.verbosityLevel > 2 or Globals.debugFlag: print( _(" USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder)) if filename is None: filename = self.possibleFilenameDict[BBB] UBB = USFMBibleBook(self.name, BBB) UBB.load(filename, self.sourceFolder, self.encoding) if UBB._rawLines: UBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() self.saveBook(UBB) else: logging.info("USFM book {} was completely blank".format(BBB)) # end of USFMBible.loadBook def loadBookMP(self, BBB): """ Multiprocessing version! Load the requested book if it's not already loaded. """ if Globals.verbosityLevel > 2: print("USFMBible.loadBookMP( {} )".format(BBB)) assert (BBB not in self.books) self.triedLoadingBook[BBB] = True if Globals.verbosityLevel > 2 or Globals.debugFlag: print( _(" USFMBible: Loading {} from {} from {}...").format( BBB, self.name, self.sourceFolder)) UBB = USFMBibleBook(self.name, BBB) UBB.load(self.possibleFilenameDict[BBB], self.sourceFolder, self.encoding) UBB.validateMarkers( ) # Usually activates InternalBibleBook.processLines() return UBB # end of USFMBible.loadBookMP def load(self): """ Load all the books. """ if Globals.verbosityLevel > 1: print( _("USFMBible: Loading {} from {}...").format( self.name, self.sourceFolder)) if Globals.maxProcesses > 1: # Load all the books as quickly as possible parameters = [ BBB for BBB, filename in self.maximumPossibleFilenameTuples ] # Can only pass a single parameter to map with multiprocessing.Pool(processes=Globals.maxProcesses ) as pool: # start worker processes results = pool.map(self.loadBookMP, parameters) # have the pool do our loads assert (len(results) == len(parameters)) for bBook in results: self.saveBook(bBook) else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames try: loadDetails = self.maximumPossibleFilenameTuples except AttributeError: logging.critical("USFMBible.load " + _("has nothing to load!")) return for BBB, filename in loadDetails: loadedBook = self.loadBook(BBB, filename) # also saves it self.doPostLoadProcessing()