def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating OpenSong XML book...")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB) thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), ProgNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName
def __validateAndExtractBook(self, book, bookNumber): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText(bookName) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText(adjustedBookName) BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print("Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB)) BBB = BBB2 #print( BBB ); halt if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Saving {} into results…".format(BBB)) self.stashBook(thisBook)
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print(_("Validating XML book...")) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib, value in book.items(): if attrib == "bnumber": bookNumber = value elif attrib == "bname": bookName = value elif attrib == "bsname": bookShortName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value)) if bookNumber: try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if Globals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "Haggai XML Bible Book object" thisBook.objectTypeString = "Haggai" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == HaggaiXMLBible.captionTag: sublocation = "caption in {}".format(BBB) Globals.checkXMLNoAttributes(element, sublocation, 'jhl6') Globals.checkXMLNoSubelements(element, sublocation, 'jk21') Globals.checkXMLNoTail(element, sublocation, 'kjh6') thisBook.appendLine('mt', element.text) elif element.tag == HaggaiXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) Globals.checkXMLNoText(element, sublocation, 'j3jd') Globals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error("Expected to find '{}' but got '{}'".format( HaggaiXMLBible.chapterTag, element.tag)) if Globals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook)
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Haggai XML Bible Book object' thisBook.objectTypeString = 'Haggai' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == HaggaiXMLBible.captionTag: sublocation = "caption in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' ) thisBook.addLine( 'mt', element.text ) elif element.tag == HaggaiXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print(_("Validating OpenSong XML book...")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if Globals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self.name, BBB) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) Globals.checkXMLNoText(element, sublocation, 'j3jd') Globals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag)) if Globals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName)) logging.error(_("OpenSong load can't find a book name"))
def __validateAndExtractBook( self, book, bookNumber ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName ) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText( adjustedBookName ) BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) ) BBB = BBB2 #print( BBB ); halt if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName ) # Try non-English booknames #print( "bookName", bookName, BBB ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB ) thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) ) thisBook.addLine( 'h', bookName ) thisBook.addLine( 'mt1', bookName ) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook ) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print( _("Validating XML book...") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB( bookName ) if BBB: if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) ) thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "Zefania XML Bible Book object" thisBook.objectTypeString = "Zefania" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == ZefaniaXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) Globals.checkXMLNoText( element, sublocation, 'j3jd' ) Globals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.chapterTag, element.tag ) ) if Globals.verbosityLevel > 2: print( " Saving {} into results...".format( BBB ) ) self.saveBook( thisBook )
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.error( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except (KeyError, ValueError): logging.critical( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) if BBB is None and bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Zefania XML Bible Book object' thisBook.objectTypeString = 'Zefania' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == ZefaniaXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( ZefaniaXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBB( bookName ) if BBB: if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = Globals.BibleBooksCodes.getUSFMAbbreviation( BBB ) thisBook.appendLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) ) thisBook.appendLine( 'h', bookName ) thisBook.appendLine( 'mt1', bookName ) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) Globals.checkXMLNoText( element, sublocation, 'j3jd' ) Globals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag ) ) if Globals.verbosityLevel > 2: print( " Saving {} into results...".format( BBB ) ) self.saveBook( thisBook ) else: logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName ) ) # no BBB else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in filenameEndingsToAccept: logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) ) elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ): logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) ) connection = sqlite3.connect( self.sourceFilepath ) connection.row_factory = sqlite3.Row # Enable row names cursor = connection.cursor() # First get the settings cursor.execute( 'select * from Details' ) row = cursor.fetchone() for key in row.keys(): self.settingsDict[key] = row[key] #print( self.settingsDict ); halt if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description'] if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) ) if self.settingsDict['OT'] and self.settingsDict['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.settingsDict['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" ) # Create the first book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.settingsDict: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) ) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) ) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) #print( "MySword.load", BBB, C, V, repr(line) ) handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 ) self.saveBook( thisBook ) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = BOS.getNextBookCode( BBB ) # Create the next book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" haveLines = False verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C-1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine( 'p', '' ) ourGlobals['haveParagraph'] = False cursor.close() self.doPostLoadProcessing()
def loadBook( self, BBB ): """ Load the requested book out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("loadBook( {} )").format( BBB ) ) assert self.preloadDone if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print( " {} is already loaded -- returning".format( BBB ) ) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name ) ) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath ) ) #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'BOTH', 'GEN' #booksExpected, textLineCountExpected = 1, 31102 #elif self.suppliedMetadata['MySword']['OT']: #testament, BBB = 'OT', 'GEN' #booksExpected, textLineCountExpected = 1, 23145 #elif self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'NT', 'MAT' #booksExpected, textLineCountExpected = 1, 7957 # Create the first book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) ) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) ) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) #print( "MySword.load", BBB, C, V, repr(line) ) handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C <= numC: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C-1] V = 1 else: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 2: print( " MySword saving", BBB ) self.stashBook( thisBook ) #else: print( "Not saving", BBB ) break if ourGlobals['haveParagraph']: thisBook.addLine( 'p', '' ) ourGlobals['haveParagraph'] = False
def load( self ): """ Load all the books out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("load()") ) assert self.preloadDone if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.suppliedMetadata['MySword']['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.suppliedMetadata['MySword']['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 # Create the first book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) ) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) ) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) #print( "MySword.load", BBB, C, V, repr(line) ) handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print( " MySword saving", BBB, bookCount+1 ) self.stashBook( thisBook ) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = self.BOS.getNextBookCode( BBB ) # Create the next book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' haveLines = False verseList = self.BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C-1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine( 'p', '' ) ourGlobals['haveParagraph'] = False self.cursor.close() self.applySuppliedMetadata( 'MySword' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
def loadBook(self, BBB): """ Load the requested book out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(exp("loadBook( {} )").format(BBB)) assert self.preloadDone if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print(" {} is already loaded -- returning".format(BBB)) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath)) #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'BOTH', 'GEN' #booksExpected, textLineCountExpected = 1, 31102 #elif self.suppliedMetadata['MySword']['OT']: #testament, BBB = 'OT', 'GEN' #booksExpected, textLineCountExpected = 1, 23145 #elif self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'NT', 'MAT' #booksExpected, textLineCountExpected = 1, 7957 # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) C = V = 1 #bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Have missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}" .format(BBB, C, V, line)) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}" .format(BBB, C, V, line, self.suppliedMetadata['MySword'])) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #print( "MySword.load", BBB, C, V, repr(line) ) handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C <= numC: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 else: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 2: print(" MySword saving", BBB) self.stashBook(thisBook) #else: print( "Not saving", BBB ) break if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False
def load( self ): """ Load a single source file and load book elements. """ if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) def decodeVerse( encodedVerseString ): """ Decodes the verse which has @ format codes. """ verseString = encodedVerseString if verseString.startswith( '@@' ): # This simply means that encoding follows verseString = verseString[2:] if verseString.startswith( '@@' ): # This simply means that encoding follows verseString = verseString[2:] # Paragraph markers (marked now with double backslash) verseString = verseString.replace( '@^', '\\\\p ' ) verseString = verseString.replace( '@0', '\\\\m ' ) verseString = verseString.replace( '@1', '\\\\q1 ' ).replace( '@2', '\\\\q2 ' ).replace( '@3', '\\\\q3 ' ).replace( '@4', '\\q4 ' ) verseString = verseString.replace( '@8', '\\\\m ' ) # Character markers (marked now with single backslash) verseString = verseString.replace( '@6', '\\wj ' ).replace( '@5', '\\wj*' ) verseString = verseString.replace( '@9', '\\add ' ).replace( '@7', '\\add*' ) # or \\i ??? verseString = re.sub( r'@<f([0-9])@>@/', r'\\ff\1', verseString ) verseString = re.sub( r'@<x([0-9])@>@/', r'\\xx\1', verseString ) #print( repr( verseString ) ) assert( '@' not in verseString ) return verseString # end of decodeVerse # Read all the lines into bookDict lastLine, lineCount = '', 0 bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(), OrderedDict(), {}, {}, {} BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = '' lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " YETBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'YETBible file line is "' + line + '"' ) bits = line.split( '\t' ) #print( self.givenName, BBB, bits ) if bits[0] == 'info': assert( len(bits) == 3 ) if bits[1] == 'shortName': shortName = bits[2] self.name = shortName elif bits[1] == 'longName': longName = bits[2] elif bits[1] == 'description': description = bits[2] elif bits[1] == 'locale': locale = bits[2] assert( 2 <= len(locale) <= 3 ) if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding else: logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \ .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) ) continue elif bits[0] == 'book_name': assert( 3 <= len(bits) <= 4 ) thisBBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1] ) if len(bits) == 3: bookNameDict[thisBBB] = bits[2], '' elif len(bits) == 4: bookNameDict[thisBBB] = bits[2], bits[3] continue elif bits[0] == 'verse': assert( len(bits) == 5 ) bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) ) if BBB != lastBBB: # We have a new book if lastBBB is not None: # We have a completed book to save bookDict[lastBBB] = bookLines assert( BBB in bookNameDict ) bookLines = OrderedDict() # Keys are (C,V) strings verseString = decodeVerse( encodedVerseString ) bookLines[(chapterNumberString,verseNumberString)] = verseString # Just store it for now lastBBB = BBB continue elif bits[0] == 'pericope': assert( len(bits) == 5 ) bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) headingString = encodedHeadingString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' ) #print( repr(encodedHeadingString), repr(headingString) ) assert( '@' not in headingString ) headingDict[(BBB,chapterNumberString,verseNumberString)] = headingString, [] # Blank refList continue elif bits[0] == 'parallel': # These lines optionally follow pericope lines assert( len(bits) == 2 ) heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)] refList.append( bits[1] ) #print( "parallel2", repr(heading), refList ) headingDict[(BBB,chapterNumberString,verseNumberString)] = heading, refList continue elif bits[0] == 'xref': assert( len(bits) == 6 ) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) assert( indexNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' ) noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now noteString = noteString.replace( '@/', '' ) #print( repr(encodedNoteString), repr(noteString) ) assert( '@' not in noteString ) xrefDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString continue elif bits[0] == 'footnote': assert( len(bits) == 6 ) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) assert( indexNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' ) assert( '@' not in noteString ) footnoteDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString continue else: print( "YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt bookDict[lastBBB] = bookLines # Save the last book #if bookCode != lastBookCode: # We've started a new book #if lastBookCode != -1: # Better save the last book #self.saveBook( thisBook ) #BBB = Globals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode ) #thisBook = BibleBook( self.name, BBB ) #thisBook.objectNameString = "YET Bible Book object" #thisBook.objectTypeString = "YET" #lastBookCode = bookCode #lastChapterNumber = lastVerseNumber = -1 #if chapterNumber != lastChapterNumber: # We've started a new chapter #if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception #if chapterNumber == 0: #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.appendLine( 'c', chapterNumberString ) #lastChapterNumber = chapterNumber #lastVerseNumber = -1 ## Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': ## Move Psalm titles to verse zero #verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.appendLine( 'v', verseNumberString + ' ' + vText ) #lastVText = vText #lastVerseNumber = verseNumber # Now process the books for BBB,bkData in bookDict.items(): #print( "Processing", BBB ) thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "YET Bible Book object" thisBook.objectTypeString = "YET" lastChapterNumberString = None for (chapterNumberString,verseNumberString), verseString in bkData.items(): # Insert headings (can only occur before verses) if (BBB,chapterNumberString,verseNumberString) in headingDict: heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)] #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) thisBook.appendLine( 's', heading ) if refList: refString = "" #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) for ref in refList: refString += ('; ' if refString else '') + ref #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) ) thisBook.appendLine( 'r', '('+refString+')' ) # Insert footnotes and cross-references while( '\\ff' in verseString ): #print( "footnote", repr(verseString) ) fIx = verseString.index( '\\ff' ) caller = verseString[fIx+3] #print( "fcaller", repr(caller) ) assert( caller.isdigit() ) note = footnoteDict[(BBB,chapterNumberString,verseNumberString,caller)] #print( "fnote", repr(note) ) verseString = verseString[:fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[fIx+4:] #print( "fvS", repr(verseString) ) while( '\\xx' in verseString ): #print( "xref", repr(verseString) ) fIx = verseString.index( '\\xx' ) caller = verseString[fIx+3] #print( "xcaller", repr(caller) ) assert( caller.isdigit() ) note = xrefDict[(BBB,chapterNumberString,verseNumberString,caller)] #print( "xnote", repr(note) ) verseString = verseString[:fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[fIx+4:] #print( "xvS", repr(verseString) ) # Save the Bible data fields if chapterNumberString != lastChapterNumberString: thisBook.appendLine( 'c', chapterNumberString ) lastChapterNumberString = chapterNumberString #print( BBB, chapterNumberString, verseNumberString, repr(verseString) ) if verseString.startswith( '\\\\' ): # It's an initial paragraph marker if verseString[3]==' ': marker, verseString = verseString[2], verseString[4:] elif verseString[4]==' ': marker, verseString = verseString[2:4], verseString[5:] else: halt #print( '', '\\'+marker ) thisBook.appendLine( marker, '' ) assert( not verseString.startswith( '\\\\' ) ) bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers) for j,bit in enumerate(bits): #print( "loop", j, repr(bit), repr(verseString) ) if j==0: thisBook.appendLine( 'v', verseNumberString + ' ' + verseString.rstrip() ) else: if bit[1]==' ': marker, bit = bit[0], bit[2:] elif bit[2]==' ': marker, bit = bit[0:2], bit[3:] else: halt #print( "mV", marker, repr(bit), repr(verseString) ) thisBook.appendLine( marker, bit.rstrip() ) self.saveBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in filenameEndingsToAccept: logging.critical("{} doesn't appear to be a MySword file".format( self.sourceFilename)) elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0]): logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename)) connection = sqlite3.connect(self.sourceFilepath) connection.row_factory = sqlite3.Row # Enable row names cursor = connection.cursor() # First get the settings cursor.execute('select * from Details') row = cursor.fetchone() for key in row.keys(): self.settingsDict[key] = row[key] #print( self.settingsDict ); halt if 'Description' in self.settingsDict and len( self.settingsDict['Description']) < 40: self.name = self.settingsDict['Description'] if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.settingsDict: logging.critical("{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'])) if self.settingsDict['OT'] and self.settingsDict['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.settingsDict['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG") # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" verseList = BOS.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.settingsDict: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}" .format(BBB, C, V, repr(line))) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}" .format(BBB, C, V, repr(line), self.settingsDict)) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #print( "MySword.load", BBB, C, V, repr(line) ) handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print("Saving", BBB, bookCount + 1) self.saveBook(thisBook) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = BOS.getNextBookCode(BBB) # Create the next book thisBook = BibleBook(self, BBB) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" haveLines = False verseList = BOS.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False cursor.close() self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) status = 0 # 1 = getting chapters, 2 = getting verse data lastLine, lineCount = '', 0 BBB = lastBBB = None bookDetails = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1: if line[0]==chr(65279): #U+FEFF logging.info( "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) ) line = line[1:] # Remove the UTF-16 Unicode Byte Order Marker (BOM) elif line[:3] == '': # 0xEF,0xBB,0xBF logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) ) line = line[3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM) if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines #print ( 'DB file line is "' + line + '"' ) if line[0] == '#': continue # Just discard comment lines lastLine = line if lineCount == 1: if line != '*Bible': logging.warning( "Unknown DrupalBible first line: {}".format( repr(line) ) ) elif status == 0: if line == '*Chapter': status = 1 else: # Get the version name details bits = line.split( '|' ) shortName, fullName, language = bits self.name = fullName elif status == 1: if line == '*Context': status = 2 else: # Get the book name details bits = line.split( '|' ) bookCode, bookFullName, bookShortName, numChapters = bits assert bookShortName == bookCode BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode ) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first) bookDetails[BBB] = bookFullName, bookShortName, numChapters elif status == 2: # Get the verse text bits = line.split( '|' ) bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString ) if lineMark: print( repr(lineMark) ); halt BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode ) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first) if BBB != lastBBB: if lastBBB is not None: self.stashBook( thisBook ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'DrupalBible Bible Book object' thisBook.objectTypeString = 'DrupalBible' lastChapterNumberString = None lastBBB = BBB if chapterNumberString != lastChapterNumberString: thisBook.addLine( 'c', chapterNumberString ) lastChapterNumberString = chapterNumberString verseText = verseText.replace( '<', '\\it ' ).replace( '>', '\\it*' ) thisBook.addLine( 'v', verseNumberString + ' ' + verseText ) else: halt # Save the final book self.stashBook( thisBook ) self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' ) if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' ) if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' ) if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 vplType = bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount==1: if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff logging.info( " VPLBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) # Try to identify the VPL type match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line ) if match: vplType = 1 else: match = re.search( '^(\\d{8})\\s', line ) if match: vplType = 2 else: match = re.search( '^# language_name:\\s', line ) if match: vplType = 3 #else: #match = re.search( '^; TITLE:\\s', line ) #if match: vplType = 4 if match: if BibleOrgSysGlobals.debugFlag: print( "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) ) else: if BibleOrgSysGlobals.verbosityLevel > 2: print( "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print( 'vplType', vplType ) #print ( 'VPL file line is "' + line + '"' ) lastLine = line # Process header stuff if vplType == 3: if line.startswith( '# language_name:' ): string = line[16:].strip() if string and string != 'Not available': settingsDict['LanguageName'] = string continue elif line.startswith( '# closest ISO 639-3:' ): string = line[20:].strip() if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string continue elif line.startswith( '# year_short:' ): string = line[13:].strip() if string and string != 'Not available': settingsDict['Year.short'] = string continue elif line.startswith( '# year_long:' ): string = line[12:].strip() if string and string != 'Not available': settingsDict['Year.long'] = string continue elif line.startswith( '# title:' ): string = line[8:].strip() if string and string != 'Not available': settingsDict['WorkTitle'] = string continue elif line.startswith( '# URL:' ): string = line[6:].strip() if string and string != 'Not available': settingsDict['URL'] = string continue elif line.startswith( '# copyright_short:' ): string = line[18:].strip() if string and string != 'Not available': settingsDict['Copyright.short'] = string continue elif line.startswith( '# copyright_long:' ): string = line[17:].strip() if string and string != 'Not available': settingsDict['Copyright.long'] = string continue elif line[0]=='#': logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) ) continue # Just discard comment lines #elif vplType == 4: #if line.startswith( '; TITLE:' ): #string = line[8:].strip() #if string: settingsDict['TITLE'] = string #continue #elif line.startswith( '; ABBREVIATION:' ): #string = line[15:].strip() #if string: settingsDict['ABBREVIATION'] = string #continue #elif line.startswith( '; HAS ITALICS:' ): #string = line[15:].strip() #if string: settingsDict['HAS_ITALICS'] = string #continue #elif line.startswith( '; HAS FOOTNOTES:' ): #string = line[15:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS FOOTNOTES' ): #string = line[14:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS REDLETTER:' ): #string = line[15:].strip() #if string: settingsDict['HAS_REDLETTER'] = string #continue #elif line[0]==';': #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) ) #continue # Just discard comment lines # Process the main segment if vplType == 1: bits = line.split( ' ', 2 ) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCode, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':' ) else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert 2 <= len(bookCode) <= 4 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit() continue chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCode != lastBookCode: # We've started a new book #if bookCode in ('Ge',): BBB = 'GEN' if bookCode in ('Le',): BBB = 'LEV' elif bookCode in ('Jud',): BBB = 'JDG' #elif bookCode in ('Es',): BBB = 'EST' #elif bookCode in ('Pr',): BBB = 'PRO' elif bookCode in ('So',): BBB = 'SNG' elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode ) # Try to guess # Handle special formatting # [square-brackets] are for Italicized words # <angle-brackets> are for the Words of Christ in Red # «chevrons» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0]=='«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB=='PSA' and verseNumberString=='1': # Psalm title vBits = vText[1:].split( '»' ) #print( "vBits", vBits ) thisBook.addLine( 'd', vBits[0] ) # Psalm title vText = vBits[1].lstrip() # Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif vplType in (2,3): bits = line.split( '\t', 1 ) #print( self.givenName, BBB, bits ) bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:] #print( bookNumberString, chapterNumberString, verseNumberString ) while len(chapterNumberString)>1 and chapterNumberString[0]=='0': chapterNumberString = chapterNumberString[1:] # Remove leading zeroes while len(verseNumberString)>1 and verseNumberString[0]=='0': verseNumberString = verseNumberString[1:] # Remove leading zeroes bookCode, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString) vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \ .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \ .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!') if bookCode != lastBookCode: # We've started a new book bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS', 76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2', 85:'PSS', 86:'ODE', } if 1 <= bookCode <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCode ) else: BBB = bnDict[bookCode] #elif vplType == 4: #if line.startswith( '$$ ' ): #if metadataName and metadataContents: #settingsDict[metadataName] = metadataContents #metadataName = None #pointer = line[3:] ##print( "pointer", repr(pointer) ) #if pointer and pointer[0]=='{' and pointer[-1]=='}': #metadataName = pointer[1:-1] #if metadataName: ##print( "metadataName", repr(metadataName) ) #metadataContents = '' #else: # let's assume it's a BCV reference #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) #B_CV_Bits = pointer.split( ' ', 1 ) #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: #bookCode, CVString = B_CV_Bits #chapterNumberString, verseNumberString = CVString.split( ':' ) #chapterNumber = int( chapterNumberString ) #verseNumber = int( verseNumberString ) #if bookCode != lastBookCode: # We've started a new book #if bookCode in ('Ge',): BBB = 'GEN' #elif bookCode in ('Le',): BBB = 'LEV' #elif bookCode in ('La',): BBB = 'LAM' #else: ##print( "4BookCode =", repr(bookCode) ) ##BBB = BOS.getBBBFromText( bookCode ) # Try to guess #BBB = BOS66.getBBBFromText( bookCode ) # Try to guess #if not BBB: BBB = BOS81.getBBBFromText( bookCode ) # Try to guess #if not BBB: BBB = BOSx.getBBBFromText( bookCode ) # Try to guess ##print( "4BBB =", repr(BBB) ) #else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) #continue # Just save the pointer information which refers to the text on the next line #else: # it's not a $$ line #text = line ##print( "text", repr(text) ) #if metadataName: #metadataContents += ('\n' if metadataContents else '') + text #continue #else: #vText = text ## Handle bits like (<scripref>Pr 2:7</scripref>) #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) ##if '\\' in vText: print( 'VPL vText', repr(vText) ) #if vplType == 4: # Forge for SwordSearcher ##print( BBB, chapterNumber, verseNumber, repr(vText) ) ## Convert {stuff} to footnotes #match = re.search( '\\{(.+?)\\}', vText ) #while match: #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\{(.+?)\\}', vText ) ## Convert [stuff] to added fields #match = re.search( '\\[(.+?)\\]', vText ) #while match: #addText = '\\add {}\\add*'.format( match.group(1) ) #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\[(.+?)\\]', vText ) #for badChar in '{}[]': #if badChar in vText: #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) #break else: logging.critical( 'Unknown VPL type {}'.format( vplType ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook( thisBook ) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) ) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VPL Bible Book object' thisBook.objectTypeString = 'VPL' verseList = BOSx.getNumVersesList( BBB ) numChapters, numVerses = len(verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCode ) ) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) # Check for paragraph markers if vText and vText[0]=='¶': thisBook.addLine( 'p', '' ) vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) ) # Save the final book if thisBook is not None: self.stashBook( thisBook ) # Clean up if settingsDict: #print( "VPL settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VPL'] = settingsDict self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
def load( self ): """ Load the compressed data file and import book elements. """ import zlib if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) ) with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done fileBytes = myFile.read() if BibleOrgSysGlobals.debugFlag: print( " {:,} bytes read".format( len(fileBytes) ) ) keep = {} index = 0 #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] ) keep['block1'] = fileBytes[index:index+32] hString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break hString += chr( char8 ) if BibleOrgSysGlobals.debugFlag: print( 'block1b', hexlify( fileBytes[index+j:index+32] ) ) # Skipped some (important?) binary here index += 32 if BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index ) assert hString == 'EasyWorship Bible Text' #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] ) keep['block2'] = fileBytes[index:index+56] nString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break nString += chr( char8 ) # Skipped some zeroes here index += 56 if BibleOrgSysGlobals.debugFlag: print( 'nString', repr(nString), index ) self.name = nString rawBooks = [] for b in range( 1, 66+1 ): bookAbbrev = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break bookAbbrev += chr( char8 ) # Skipped some zeroes here index += 51 if bookAbbrev and bookAbbrev[-1] == '.': bookAbbrev = bookAbbrev[:-1] # Remove final period if BibleOrgSysGlobals.verbosityLevel > 2: print( 'bookAbbrev', repr(bookAbbrev) ) numChapters = fileBytes[index] numVerses = [] for j in range( 0, numChapters ): numVerses.append( fileBytes[index+j+1] ) # Skipped some zeroes here index += 157 if BibleOrgSysGlobals.debugFlag: print( ' ', numChapters, numVerses ) bookStart, = struct.unpack( "<I", fileBytes[index:index+4] ) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print( ' bookStart', bookStart ) bookLength, = struct.unpack( "<I", fileBytes[index:index+4] ) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print( ' bookLength', bookLength, bookStart+bookLength ) bookBytes = fileBytes[bookStart:bookStart+bookLength] assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header rawBooks.append( (bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes) ) if BibleOrgSysGlobals.debugFlag: print( 'unknown block3', index, hexlify( fileBytes[index:index+30] ) ) keep['block3'] = fileBytes[index:index+30] length3, = struct.unpack( "<I", fileBytes[index:index+4] ) if length3: block3 = fileBytes[index+4:index+4+length3-4] byteResult = zlib.decompress( block3 ) textResult = byteResult.decode( 'utf8' ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "Got", len(textResult), textResult, 'from', length3 ) keep['block3n'] = textResult if self.name: print( 'Overwriting module name {!r} with {!r}'.format( self.name, textResult ) ) self.name = textResult index += length3 if BibleOrgSysGlobals.debugFlag: print( 'end of contents', index, hexlify( fileBytes[index:index+60] ) ) keep['block4'] = rawBooks[0][3] block5 = fileBytes[index:rawBooks[0][3]] keep['block5'] = block5 index += len( block5 ) #if self.abbreviation in ( 'TB', ): # Why don't the others work assert index == rawBooks[0][3] # Should now be at the start of the first book (already fetched above) assert len(rawBooks) == 66 # Look at extra stuff at end endBytes = fileBytes[bookStart+bookLength:] if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes ) assert len(endBytes) == 16 keep['block9'] = endBytes # Skipped some binary and some text here del fileBytes # Now we have to decode the book text (compressed about 4x with zlib) for j, BBB in enumerate( BOS.getBookList() ): if BibleOrgSysGlobals.verbosityLevel > 2: print( ' Decoding {}…'.format( BBB ) ) bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j] byteResult = zlib.decompress( bookBytes ) textResult = byteResult.decode( 'utf8' ) if '\t' in textResult: logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) ) textResult = textResult.replace( '\t', ' ' ) #print( textResult ) if BibleOrgSysGlobals.strictCheckingFlag: assert ' ' not in textResult thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'EasyWorship Bible Book object' thisBook.objectTypeString = 'EasyWorship Bible' if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev ) C = V = '0' for line in textResult.split( '\r\n' ): if not line: continue # skip blank lines if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) ) assert line[0].isdigit() assert ':' in line[:4] CV,verseText = line.split( ' ', 1 ) newC,newV = CV.split( ':' ) #print( newC, V, repr(verseText) ) if newC != C: if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order chapters in {}!".format( BBB ) ) else: assert int(newC) > int(C) C, V = newC, '0' thisBook.addLine( 'c', C ) if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats if int(newV) < int(V): break elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) else: try: assert int(newV) > int(V) except ValueError: if BibleOrgSysGlobals.debugFlag: print( "Something's not an integer around {} {}:{} {}".format( BBB, C, V, verseText ) ) V = newV thisBook.addLine( 'v', V + ' ' + verseText ) if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB ) self.stashBook( thisBook ) self.doPostLoadProcessing() return keep
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) lastLine, lineCount = '', 0 BBB = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF logging.info( " VPLBible.load: Detected UTF-16 Byte Order Marker" ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'VLP file line is "' + line + '"' ) if line[0]=='#': continue # Just discard comment lines bits = line.split( ' ', 2 ) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCode, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':' ) else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert( 2 <= len(bookCode) <= 4 ) if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() ) if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert( verseNumberString.isdigit() ) continue chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.saveBook( thisBook ) #if bookCode in ('Ge',): BBB = 'GEN' #elif bookCode in ('Le',): BBB = 'LEV' ##elif bookCode in ('Jud',): BBB = 'JDG' #elif bookCode in ('Es',): BBB = 'EST' #elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB( bookCode ) # Try to guess if BBB: thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "VPL Bible Book object" thisBook.objectTypeString = "VPL" lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible could not figure out {!r} book code".format( bookCode ) ) if BibleOrgSysGlobals.debugFlag: halt if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle special formatting # [brackets] are for Italicized words # <brackets> are for the Words of Christ in Red # «brackets» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0]=='«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB=='PSA' and verseNumberString=='1': # Psalm title vBits = vText[1:].split( '»' ) #print( "vBits", vBits ) thisBook.addLine( 'd', vBits[0] ) # Psalm title vText = vBits[1].lstrip() # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) lastLine, lineCount = '', 0 BBB = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and self.encoding.lower( ) == 'utf-8' and line[0] == chr(65279): #U+FEFF logging.info( " VPLBible.load: Detected UTF-16 Byte Order Marker" ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'VLP file line is "' + line + '"' ) if line[0] == '#': continue # Just discard comment lines bits = line.split(' ', 2) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCode, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':') else: print("Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) if not bookCode and not chapterNumberString and not verseNumberString: print("Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BibleOrgSysGlobals.debugFlag: assert (2 <= len(bookCode) <= 4) if BibleOrgSysGlobals.debugFlag: assert (chapterNumberString.isdigit()) if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert (verseNumberString.isdigit()) continue chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.saveBook(thisBook) #if bookCode in ('Ge',): BBB = 'GEN' #elif bookCode in ('Le',): BBB = 'LEV' ##elif bookCode in ('Jud',): BBB = 'JDG' #elif bookCode in ('Es',): BBB = 'EST' #elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB( bookCode) # Try to guess if BBB: thisBook = BibleBook(self, BBB) thisBook.objectNameString = "VPL Bible Book object" thisBook.objectTypeString = "VPL" lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible could not figure out {!r} book code". format(bookCode)) if BibleOrgSysGlobals.debugFlag: halt if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert (chapterNumber > lastChapterNumber or BBB == 'ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle special formatting # [brackets] are for Italicized words # <brackets> are for the Words of Christ in Red # «brackets» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0] == '«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB == 'PSA' and verseNumberString == '1': # Psalm title vBits = vText[1:].split('»') #print( "vBits", vBits ) thisBook.addLine('d', vBits[0]) # Psalm title vText = vBits[1].lstrip() # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}"). format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}"). format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook(thisBook) self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' ) if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' ) if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' ) if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount==1: if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff logging.info( " ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search( '^; TITLE:\\s', line ) if match: if BibleOrgSysGlobals.debugFlag: print( "First line got type {!r} match from {!r}".format( match.group(0), line ) ) else: if BibleOrgSysGlobals.verbosityLevel > 2: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print ( 'ForgeForSwordSearcher file line is "' + line + '"' ) lastLine = line # Process header stuff if line.startswith( '; TITLE:' ): string = line[8:].strip() if string: settingsDict['TITLE'] = string continue elif line.startswith( '; ABBREVIATION:' ): string = line[15:].strip() if string: settingsDict['ABBREVIATION'] = string continue elif line.startswith( '; HAS ITALICS' ): string = line[14:].strip() if string: settingsDict['HAS_ITALICS'] = string continue elif line.startswith( '; HAS FOOTNOTES:' ): string = line[15:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith( '; HAS FOOTNOTES' ): string = line[14:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith( '; HAS REDLETTER' ): string = line[14:].strip() if string: settingsDict['HAS_REDLETTER'] = string continue elif line[0]==';': logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}".format( line ) ) continue # Just discard comment lines # Process the main segment if line.startswith( '$$ ' ): if metadataName and metadataContents: settingsDict[metadataName] = metadataContents metadataName = None pointer = line[3:] #print( "pointer", repr(pointer) ) if pointer and pointer[0]=='{' and pointer[-1]=='}': metadataName = pointer[1:-1] if metadataName: #print( "metadataName", repr(metadataName) ) metadataContents = '' else: # let's assume it's a BCV reference pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) B_CV_Bits = pointer.split( ' ', 1 ) if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: bookCode, CVString = B_CV_Bits chapterNumberString, verseNumberString = CVString.split( ':' ) chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCode != lastBookCode: # We've started a new book if bookCode in ('Ge',): BBB = 'GEN' elif bookCode in ('Le',): BBB = 'LEV' elif bookCode in ('La',): BBB = 'LAM' ##elif bookCode in ('Es',): BBB = 'EST' ##elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #print( "4BookCode =", repr(bookCode) ) #BBB = BOS.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode ) # Try to guess #print( "4BBB =", repr(BBB) ) else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) continue # Just save the pointer information which refers to the text on the next line else: # it's not a $$ line text = line #print( "text", repr(text) ) if metadataName: metadataContents += ('\n' if metadataContents else '') + text continue else: vText = text # Handle bits like (<scripref>Pr 2:7</scripref>) vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) ) #print( BBB, chapterNumber, verseNumber, repr(vText) ) # Convert {stuff} to footnotes match = re.search( '\\{(.+?)\\}', vText ) while match: footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search( '\\{(.+?)\\}', vText ) # Convert [stuff] to added fields match = re.search( '\\[(.+?)\\]', vText ) while match: addText = '\\add {}\\add*'.format( match.group(1) ) vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search( '\\[(.+?)\\]', vText ) # Convert +r/This text is red-letter-r/ to wj fields match = re.search( '\\+r/(.+?)-r/', vText ) while match: addText = '\\wj {}\\wj*'.format( match.group(1) ) vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search( '\\+r/(.+?)-r/', vText ) # Final check for unexpected remaining formatting for badChar in '{}[]/': if badChar in vText: logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) break if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook( thisBook ) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) ) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object' thisBook.objectTypeString = 'ForgeForSwordSearcher' verseList = BOSx.getNumVersesList( BBB ) numChapters, numVerses = len(verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code".format( bookCode ) ) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) # Check for paragraph markers if vText and vText[0]=='¶': thisBook.addLine( 'p', '' ) vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}".format( line ) ) # Save the final book if thisBook is not None: self.stashBook( thisBook ) # Clean up if settingsDict: #print( "ForgeForSwordSearcher settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Forge4SS'] = settingsDict self.applySuppliedMetadata( 'Forge4SS' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
def load( self ): """ Load the compressed data file and import book objects. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) ) with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done fileBytes = myFile.read() if debuggingThisModule or BibleOrgSysGlobals.debugFlag: print( " {:,} bytes read".format( len(fileBytes) ) ) keep = OrderedDict() index = 0 # Block 1 is 32-bytes long and always the same for EW2009 Bibles #if debuggingThisModule: print( 'introBlock', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] ) keep['introBlock'] = (index,fileBytes[index:index+32]) hString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break hString += chr( char8 ) #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index ) if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert hString == 'EasyWorship Bible Text' introBlockb = fileBytes[index+j:index+32] #if BibleOrgSysGlobals.debugFlag: print( 'introBlockb', hexlify( introBlockb ), introBlockb ) assert introBlockb == b'\x1a\x02<\x00\x00\x00\xe0\x00\x00\x00' # b'1a023c000000e0000000' # Skipped some (important?) binary here??? but it's the same for every module index += 32 # Block 2 is 56-bytes long moduleNameBlock = fileBytes[index:index+56] keep['moduleNameBlock'] = (index,moduleNameBlock) #if debuggingThisModule: print( 'moduleNameBlock', hexlify( moduleNameBlock ), moduleNameBlock ) nString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break nString += chr( char8 ) #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: print( 'nString', repr(nString), index ) if BibleOrgSysGlobals.verbosityLevel > 1: print( "EasyWorshipBible.load: " + _("Setting module name to {!r}").format( self.name ) ) self.name = nString #assert self.name # Not there for amp and gkm moduleNameBlockb = fileBytes[index+j:index+56] #if BibleOrgSysGlobals.debugFlag: print( 'moduleNameBlockb', len(moduleNameBlockb), hexlify( moduleNameBlockb ), moduleNameBlockb ) #assert moduleNameBlockb.endswith( b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00' ) # b'000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000' for ix in range( index+j, index+56 ): # Mostly zeroes remaining if ix == 84: # What does this mean??? value = fileBytes[ix] assert value in (0,1,2,3,4,5) # bbe=0, alb=1, esv2=2, esv=3, asv=4 nasb=5 Revision number??? keep['byte84'] = (index,value) else: assert fileBytes[ix] == 0 index += 56 # Get the optional booknames and the raw data for each book into a list rawBooks = [] for bookNumber in range( 1, 66+1 ): bookInfoBlock = fileBytes[index:index+51] blockName = 'bookInfoBlock-{}'.format( bookNumber ) keep[blockName] = (index,bookInfoBlock) #if debuggingThisModule: print( blockName, hexlify( bookInfoBlock ), bookInfoBlock ) bookName = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break # bookName seems quite optional -- maybe the English ones are assumed if empty??? bookName += chr( char8 ) assert fileBytes[index+j:index+51] == b'\x00' * (51-j) # Skipped some zeroes here index += 51 if bookName and bookName[-1] == '.': bookName = bookName[:-1] # Remove final period #if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2: #print( 'bookName', repr(bookName) ) numChapters = fileBytes[index] numVerses = [] for j in range( 0, numChapters ): numVerses.append( fileBytes[index+j+1] ) #print( "here1", 157-j-2, hexlify(fileBytes[index+j+2:index+157]), fileBytes[index+j+2:index+157] ) if self.abbreviation != 'fn1938': # Why does this fail??? assert fileBytes[index+j+2:index+157] == b'\x00' * (157-j-2) # Skipped some zeroes here index += 157 #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: #print( ' {!r} numChapters={} verses={}'.format( bookName, numChapters, numVerses ) ) bookStart, = struct.unpack( "<I", fileBytes[index:index+4] ) assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here index += 8 #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: #print( ' bookStart is at {:,}'.format( bookStart ) ) bookLength, = struct.unpack( "<I", fileBytes[index:index+4] ) assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here index += 8 #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: #print( ' {} bookLength is {:,} which goes to {:,}'.format( bookNumber, bookLength, bookStart+bookLength ) ) bookBytes = fileBytes[bookStart:bookStart+bookLength] # Looking ahead into the file rawBooks.append( (bookName, numChapters, numVerses, bookStart, bookLength, bookBytes) ) if bookLength == 0: # e.g., gkm Philippians (book number 50) logging.critical( "Booknumber {} is empty in {}".format( bookNumber, self.abbreviation ) ) else: #if debuggingThisModule: #print( "cHeader1 for {}: {}={} {}={}".format( self.abbreviation, bookBytes[0], hexlify(bookBytes[0:1]), bookBytes[1], hexlify(bookBytes[1:2]) ) ) assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header (for compression levels 7-9) assert index == 14872 # 32 + 56 + 224*66 workNameBlock = fileBytes[index:index+30] # 30 here is just a maximum, not fixed keep['workNameBlock'] = (index,workNameBlock) # This block starts with a length, then a work name, e.g., ezFreeASV #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: #print( 'workNameBlock', index, hexlify(workNameBlock), workNameBlock ) length3, = struct.unpack( "<I", fileBytes[index:index+4] ) #print( "length3", length3 ) # Seems to include the compressed string plus six more bytes keep['length3'] = (index,length3) if length3: bookInfoBlock = fileBytes[index+4:index+4+length3-4-6] if debuggingThisModule: print( "cHeader2 for {}: {}={} {}={}".format( self.abbreviation, bookInfoBlock[0], hexlify(bookInfoBlock[0:1]), bookInfoBlock[1], hexlify(bookInfoBlock[1:2]) ) ) assert bookInfoBlock[0]==0x78 and bookInfoBlock[1]==0xda # Zlib compression header (for compression levels 7-9) byteResult = zlib.decompress( bookInfoBlock ) #rewriteResult1 = zlib.compress( byteResult, 9 ) #byteResult1 = zlib.decompress( rewriteResult1 ) #compressor = zlib.compressobj(level=9, method=zlib.DEFLATED, wbits=15, memLevel=8, strategy=zlib.Z_DEFAULT_STRATEGY ) #rewriteResult2 = compressor.compress( byteResult ) #rewriteResult2 += compressor.flush() #byteResult2 = zlib.decompress( rewriteResult2 ) #print( "rewrite1 {} {} {}\n {} {} {}\n {} {} {}\n to {} {}\n to {} {}\n to {} {}" \ #.format( len(bookInfoBlock), hexlify(bookInfoBlock), bookInfoBlock, #len(rewriteResult1), hexlify(rewriteResult1), rewriteResult1, #len(rewriteResult2), hexlify(rewriteResult2), rewriteResult2, #len(byteResult), byteResult, #len(byteResult1), byteResult1, #len(byteResult2), byteResult2 ) ) textResult = byteResult.decode( 'utf8' ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "Block4: Got {} chars {!r} from {} bytes".format( len(textResult), textResult, length3 ) ) assert textResult.startswith('ezFree') or textResult.startswith('ezPaid') keep['workName'] = (index+4,textResult) if BibleOrgSysGlobals.verbosityLevel > 1: print( "EasyWorshipBible.load: " + _("Setting module work name to {!r}").format( textResult ) ) if self.name: self.workName = textResult else: # Should rarely happen self.name = self.workName = textResult workNameAppendage = fileBytes[index+4+length3-6-4:index+4+length3-4] #print( "workNameAppendage", len(workNameAppendage), hexlify(workNameAppendage), workNameAppendage ) keep['workNameAppendage'] = (index+4+length3-6-4,workNameAppendage) assert workNameAppendage[:4] == b'QK\x03\x04' uncompressedNameLength, = struct.unpack( "<B", workNameAppendage[4:5] ) assert workNameAppendage[5:] == b'\x00' assert len(textResult) == uncompressedNameLength keep['length3'] = (index,length3) index += length3 #print( self.abbreviation, len(textResult), repr(textResult), 'length3', length3, len(textResult)+18 ) assert length3 == len(textResult) + 18 bookDataStartIndex = rawBooks[0][3] #print( "bookDataStartIndex", bookDataStartIndex ) #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: #print( 'After known contents @ {:,}'.format( index ), hexlify( fileBytes[index:index+60] ), fileBytes[index:index+60] ) block0080 = fileBytes[index:bookDataStartIndex] #print( "block0080", index, len(block0080), hexlify(block0080), block0080 ) keep['block0080'] = (index,block0080) assert block0080 == b'\x00\x00\x08\x00' # b'00000800' index += len( block0080 ) keep['bookDataStartIndex'] = (index,bookDataStartIndex) assert index == bookDataStartIndex # Should now be at the start of the first book (already fetched above) # Look at extra stuff right at the end of the file assert len(rawBooks) == 66 index = bookStart + bookLength # of the last book endBytes = fileBytes[index:] #if BibleOrgSysGlobals.debugFlag and debuggingThisModule: #print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes ) assert len(endBytes) == 16 keep['endBytes'] = (index,endBytes) assert endBytes == b'\x18:\x00\x00\x00\x00\x00\x00ezwBible' # b'183a000000000000657a774269626c65' del fileBytes # Not needed any more # Now we have to decode the book text (compressed about 4x with zlib) if BibleOrgSysGlobals.verbosityLevel > 1: print( "EWB loading books for {}…".format( self.abbreviation ) ) for j, BBB in enumerate( BOS.getBookList() ): bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j] if bookLength == 0: assert not bookBytes logging.critical( " Skipped empty {}".format( BBB ) ) continue if BibleOrgSysGlobals.verbosityLevel > 2: print( ' Decoding {}…'.format( BBB ) ) bookBytes, bookExtra = bookBytes[:-10], bookBytes[-10:] assert len(bookExtra) == 10 keep['bookExtra-{}'.format(j+1)] = (-10,bookExtra) assert bookExtra[:4] == b'QK\x03\x04' uncompressedBookLength, = struct.unpack( "<I", bookExtra[4:8] ) assert bookExtra[8:] == b'\x08\x00' byteResult = zlib.decompress( bookBytes ) assert len(byteResult) == uncompressedBookLength try: textResult = byteResult.decode( 'utf8' ) except UnicodeDecodeError: logging.critical( "Unable to decode {} {} bookText -- maybe it's not utf-8???".format( self.abbreviation, BBB ) ) continue if debuggingThisModule: rewriteResult1 = zlib.compress( byteResult, 9 ) byteResult1 = zlib.decompress( rewriteResult1 ) if rewriteResult1 != bookBytes: print( "\nbookBytes", len(bookBytes), hexlify(bookBytes) ) print( "\nrewriteResult1", len(rewriteResult1), hexlify(rewriteResult1) ) halt if byteResult1 != byteResult: print( len(byteResult), hexlify(byteResult) ) print( len(byteResult1), hexlify(byteResult1) ) halt if '\t' in textResult: logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) ) textResult = textResult.replace( '\t', ' ' ) #print( textResult ) if BibleOrgSysGlobals.strictCheckingFlag: assert ' ' not in textResult thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'EasyWorship Bible Book object' thisBook.objectTypeString = 'EasyWorship Bible' if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev ) C, V = '-1', '-1' # So first/id line starts at -1:0 for line in textResult.split( '\r\n' ): if not line: continue # skip blank lines #if BibleOrgSysGlobals.debugFlag and debuggingThisModule: #print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) ) assert line[0].isdigit() assert ':' in line[:4] CV,verseText = line.split( ' ', 1 ) newC,newV = CV.split( ':' ) #print( newC, V, repr(verseText) ) if newC != C: if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order logging.critical( "Skipping error for out-of-order chapters in {}!".format( BBB ) ) else: assert int(newC) > int(C) C, V = newC, '0' thisBook.addLine( 'c', C ) if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats if int(newV) < int(V): break elif self.abbreviation=='drv' and BBB in ('GEN','EXO','NUM',): # Handle a bug -- Gen 18:1&12, Exo 28:42&43 out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) else: try: assert int(newV) > int(V) except ValueError: logging.critical( "Something's not an integer around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) ) except AssertionError: logging.critical( "Something's out of order around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) ) V = newV thisBook.addLine( 'v', V + ' ' + verseText ) if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB ) self.stashBook( thisBook ) self.doPostLoadProcessing() return keep
def load( self ): """ Load a single source file and load book elements. """ if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) loadErrors = [] fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in filenameEndingsToAccept: logging.critical( "{} doesn't appear to be a e-Sword file".format( self.sourceFilename ) ) elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ): logging.critical( "{} doesn't appear to be a e-Sword Bible file".format( self.sourceFilename ) ) connection = sqlite3.connect( self.sourceFilepath ) connection.row_factory = sqlite3.Row # Enable row names cursor = connection.cursor() # First get the settings cursor.execute( 'select * from Details' ) row = cursor.fetchone() for key in row.keys(): self.settingsDict[key] = row[key] #print( self.settingsDict ); halt if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description'] if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) ) # Just get some information from the file cursor.execute( 'select * from Bible' ) rows = cursor.fetchall() numRows = len(rows) if Globals.debugFlag or Globals.verbosityLevel>2: print( '{} rows found'.format( numRows ) ) BBBn1 = rows[0][0] if Globals.debugFlag or Globals.verbosityLevel>2: print( 'First book number is {}'.format( BBBn1 ) ) del rows BBB1 = None if BBBn1 <= 66: BBB1 = Globals.BibleBooksCodes.getBBBFromReferenceNumber( BBBn1 ) testament = BBB = None booksExpected = textLineCountExpected = 0 if self.settingsDict['OT'] and self.settingsDict['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.settingsDict['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 elif self.settingsDict['Abbreviation'] == 'VIN2011': # Handle encoding error logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['Apocrypha']: # incomplete testament, BBB = 'AP', 'XXX' booksExpected, textLineCountExpected = 99, 999999 halt if not BBB: logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) if 0: cursor.execute( 'select * from Bible' ) rows = cursor.fetchall() print( "rows", len(rows) ) for row in rows: assert( len(row) == 4 ) BBBn, C, V, text = row # First three are integers, the last is a string print( BBBn, C, V, repr(text) ) if C==2: break del rows # Takes a lot of memory if Globals.debugFlag or Globals.verbosityLevel>2: print( "Testament={} BBB={} BBB1={}, bE={}, tLCE={} nR={}".format( testament, BBB, BBB1, booksExpected, textLineCountExpected, numRows ) ) if BBB1 != BBB: logging.critical( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) ) loadErrors.append( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) ) if not BBB: BBB = BBB1 if numRows != textLineCountExpected: logging.critical( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) ) loadErrors.append( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) ) #halt BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" ) # Create the first book thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "e-Sword Bible Book object" thisBook.objectTypeString = "e-Sword" verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = cursor.fetchone() line = row[0] except: # This reference is missing #print( "something wrong at", BBB, C, V ) #if Globals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'e-Sw file line is "' + line + '"' ) if line is None: logging.warning( "ESwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.settingsDict: logging.critical( "ESwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) ) break else: logging.critical( "ESwordBible.load: Probably encrypted module: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) ) break elif not line: logging.warning( "ESwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) if '\r' in line or '\n' in line: if Globals.debugFlag: logging.warning( "ESwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) #print( repr(line) ) while line and line[-1] in '\r\n': line = line[:-1] # Remove CR/LFs from the end line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) # Replace CR/LFs in the middle #print( "e-Sword.load", BBB, C, V, repr(line) ) self.handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 ) self.saveBook( thisBook ) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = BOS.getNextBookCode( BBB ) # Create the next book thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "e-Sword Bible Book object" thisBook.objectTypeString = "e-Sword" haveLines = False verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #thisBook.appendLine( 'c', str(C) ) else: # next chapter only #thisBook.appendLine( 'c', str(C) ) numV = verseList[C-1] V = 1 if ourGlobals['haveParagraph']: thisBook.appendLine( 'p', '' ) ourGlobals['haveParagraph'] = False if Globals.strictCheckingFlag or Globals.debugFlag: self.checkForExtraMaterial( cursor, BOS ) cursor.close() if loadErrors: self.errorDictionary['Load Errors'] = loadErrors self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) status = 0 # 1 = getting chapters, 2 = getting verse data lastLine, lineCount = '', 0 BBB = lastBBB = None bookDetails = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1: if line[0] == chr(65279): #U+FEFF logging.info( "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}" .format(self.sourceFilepath)) line = line[ 1:] # Remove the UTF-16 Unicode Byte Order Marker (BOM) elif line[:3] == '': # 0xEF,0xBB,0xBF logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}" .format(self.sourceFilepath)) line = line[ 3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM) if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines #print ( 'DB file line is "' + line + '"' ) if line[0] == '#': continue # Just discard comment lines lastLine = line if lineCount == 1: if line != '*Bible': logging.warning( "Unknown DrupalBible first line: {}".format( repr(line))) elif status == 0: if line == '*Chapter': status = 1 else: # Get the version name details bits = line.split('|') shortName, fullName, language = bits self.name = fullName elif status == 1: if line == '*Context': status = 2 else: # Get the book name details bits = line.split('|') bookCode, bookFullName, bookShortName, numChapters = bits assert bookShortName == bookCode BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[ 0] # Result can be string or list of strings (best guess first) bookDetails[ BBB] = bookFullName, bookShortName, numChapters elif status == 2: # Get the verse text bits = line.split('|') bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString ) if lineMark: print(repr(lineMark)) halt BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[ 0] # Result can be string or list of strings (best guess first) if BBB != lastBBB: if lastBBB is not None: self.stashBook(thisBook) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'DrupalBible Bible Book object' thisBook.objectTypeString = 'DrupalBible' lastChapterNumberString = None lastBBB = BBB if chapterNumberString != lastChapterNumberString: thisBook.addLine('c', chapterNumberString) lastChapterNumberString = chapterNumberString verseText = verseText.replace('<', '\\it ').replace( '>', '\\it*') thisBook.addLine('v', verseNumberString + ' ' + verseText) else: halt # Save the final book self.stashBook(thisBook) self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) lastLine, lineCount = '', 0 BBB = None lastBookNumber = lastChapterNumber = lastVerseNumber = -1 lastVText = '' quoted = None with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " CSVBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if line==' ': continue # Handle special case which has blanks on every second line -- HACK lastLine = line #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) ) if line[0]=='#': continue # Just discard comment lines if lineCount==1: if line.startswith( '"Book",' ): quoted = True continue # Just discard header line elif line.startswith( 'Book,' ): quoted = False continue # Just discard header line bits = line.split( ',', 3 ) #print( lineCount, self.givenName, BBB, bits ) if len(bits) == 4: bString, chapterNumberString, verseNumberString, vText = bits #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) else: print( "Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits ) # Remove quote marks from these strings if quoted: if len(bString)>=2 and bString[0]==bString[-1] and bString[0] in '"\'': bString = bString[1:-1] if len(chapterNumberString)>=2 and chapterNumberString[0]==chapterNumberString[-1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1] if len(verseNumberString)>=2 and verseNumberString[0]==verseNumberString[-1] and verseNumberString[0] in '"\'': verseNumberString = verseNumberString[1:-1] if len(vText)>=2 and vText[0]==vText[-1] and vText[0] in '"\'': vText = vText[1:-1] #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) #if not bookCode and not chapterNumberString and not verseNumberString: #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if Globals.debugFlag: assert( 2 <= len(bookCode) <= 4 ) #if Globals.debugFlag: assert( chapterNumberString.isdigit() ) #if Globals.debugFlag: assert( verseNumberString.isdigit() ) bookNumber = int( bString ) chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookNumber != lastBookNumber: # We've started a new book if lastBookNumber != -1: # Better save the last book self.saveBook( thisBook ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) # Try to guess assert( BBB ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "CSV Bible Book object" thisBook.objectTypeString = "CSV" lastBookNumber = bookNumber lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) ) thisBook.appendLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Now we have to convert any possible RTF codes to our internal codes vTextOriginal = vText # First do special characters vText = vText.replace( '\\ldblquote', '“' ).replace( '\\rdblquote', '”' ).replace( '\\lquote', '‘' ).replace( '\\rquote', '’' ) vText = vText.replace( '\\emdash', '—' ).replace( '\\endash', '–' ) # Now do Unicode characters while True: # Find patterns like \\'d3 match = re.search( r"\\'[0-9a-f][0-9a-f]", vText ) if not match: break i = int( vText[match.start()+2:match.end()], 16 ) # Convert two hex characters to decimal vText = vText[:match.start()] + chr( i ) + vText[match.end():] while True: # Find patterns like \\u253? match = re.search( r"\\u[1-2][0-9][0-9]\?", vText ) if not match: break i = int( vText[match.start()+2:match.end()-1] ) # Convert three digits to decimal vText = vText[:match.start()] + chr( i ) + vText[match.end():] #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) ) ## Handle special formatting ## [brackets] are for Italicized words ## <brackets> are for the Words of Christ in Red ## «brackets» are for the Titles in the Book of Psalms. #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) #if vText and vText[0]=='«': #assert( BBB=='PSA' and verseNumberString=='1' ) #vBits = vText[1:].split( '»' ) ##print( "vBits", vBits ) #thisBook.appendLine( 'd', vBits[0] ) # Psalm title #vText = vBits[1].lstrip() # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.appendLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) lastLine, lineCount = '', 0 BBB = None lastBookNumber = lastChapterNumber = lastVerseNumber = -1 lastVText = '' quoted = None with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " CSVBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if line == ' ': continue # Handle special case which has blanks on every second line -- HACK lastLine = line #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) ) if line[0] == '#': continue # Just discard comment lines if lineCount == 1: if line.startswith('"Book",'): quoted = True continue # Just discard header line elif line.startswith('Book,'): quoted = False continue # Just discard header line bits = line.split(',', 3) #print( lineCount, self.givenName, BBB, bits ) if len(bits) == 4: bString, chapterNumberString, verseNumberString, vText = bits #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) else: print("Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits) # Remove quote marks from these strings if quoted: if len(bString) >= 2 and bString[0] == bString[ -1] and bString[0] in '"\'': bString = bString[1:-1] if len(chapterNumberString) >= 2 and chapterNumberString[ 0] == chapterNumberString[ -1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1] if len(verseNumberString) >= 2 and verseNumberString[ 0] == verseNumberString[-1] and verseNumberString[ 0] in '"\'': verseNumberString = verseNumberString[1:-1] if len(vText) >= 2 and vText[0] == vText[-1] and vText[ 0] in '"\'': vText = vText[1:-1] #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) #if not bookCode and not chapterNumberString and not verseNumberString: #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if BibleOrgSysGlobals.debugFlag: assert( 2 <= len(bookCode) <= 4 ) #if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() ) #if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() ) bookNumber = int(bString) chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookNumber != lastBookNumber: # We've started a new book if lastBookNumber != -1: # Better save the last book self.saveBook(thisBook) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) # Try to guess assert (BBB) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "CSV Bible Book object" thisBook.objectTypeString = "CSV" lastBookNumber = bookNumber lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert (chapterNumber > lastChapterNumber or BBB == 'ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Now we have to convert any possible RTF codes to our internal codes vTextOriginal = vText # First do special characters vText = vText.replace('\\ldblquote', '“').replace( '\\rdblquote', '”').replace('\\lquote', '‘').replace('\\rquote', '’') vText = vText.replace('\\emdash', '—').replace('\\endash', '–') # Now do Unicode characters while True: # Find patterns like \\'d3 match = re.search(r"\\'[0-9a-f][0-9a-f]", vText) if not match: break i = int(vText[match.start() + 2:match.end()], 16) # Convert two hex characters to decimal vText = vText[:match.start()] + chr( i) + vText[match.end():] while True: # Find patterns like \\u253? match = re.search(r"\\u[1-2][0-9][0-9]\?", vText) if not match: break i = int(vText[match.start() + 2:match.end() - 1]) # Convert three digits to decimal vText = vText[:match.start()] + chr( i) + vText[match.end():] #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) ) ## Handle special formatting ## [brackets] are for Italicized words ## <brackets> are for the Words of Christ in Red ## «brackets» are for the Titles in the Book of Psalms. #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) #if vText and vText[0]=='«': #assert( BBB=='PSA' and verseNumberString=='1' ) #vBits = vText[1:].split( '»' ) ##print( "vBits", vBits ) #thisBook.addLine( 'd', vBits[0] ) # Psalm title #vText = vBits[1].lstrip() # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}"). format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}"). format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook(thisBook) self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) lastLine, lineCount = '', 0 BBB = None NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None subverseNumberString = sequenceNumberString = None lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1 lastVText = '' with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " UnboundBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'UB file line is "' + line + '"' ) if line[0]=='#': hashBits = line[1:].split( '\t' ) if len(hashBits)==2 and hashBits[1]: # We have some valid meta-data if hashBits[0] == 'name': self.name = hashBits[1] elif hashBits[0] == 'filetype': self.filetype = hashBits[1] elif hashBits[0] == 'copyright': self.copyright = hashBits[1] elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1] elif hashBits[0] == 'language': self.language = hashBits[1] elif hashBits[0] == 'note': self.note = hashBits[1] elif hashBits[0] == 'columns': self.columns = hashBits[1] # Should some of these be placed into self.settingsDict??? logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) ) continue # Just discard comment lines bits = line.split( '\t' ) #print( self.givenName, BBB, bits ) if len(bits) == 4: bookCode, chapterNumberString, verseNumberString, vText = bits elif len(bits) == 6: bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 9: NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_' ): logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt if NRSVA_bookCode: assert( len(NRSVA_bookCode) == 3 ) if NRSVA_chapterNumberString: assert( NRSVA_chapterNumberString.isdigit() ) if NRSVA_verseNumberString: assert( NRSVA_verseNumberString.isdigit() ) if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert( len(bookCode) == 3 ) if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() ) if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() ) if subverseNumberString: logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) ) vText = vText.strip() # Remove leading and trailing spaces if not vText: continue # Just ignore blank verses I think if vText == '+': continue # Not sure what this means in basic_english JHN 1:38 chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if sequenceNumberString: if BibleOrgSysGlobals.debugFlag: assert( sequenceNumberString.isdigit() ) sequenceNumber = int( sequenceNumberString ) if BibleOrgSysGlobals.debugFlag: assert( sequenceNumber > lastSequence or \ self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) ) # Why??? lastSequence = sequenceNumber if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.saveBook( thisBook ) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Unbound Bible Book object' thisBook.objectTypeString = 'Unbound' lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG') if BOS81 is None: BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG') if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG') if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount == 1: if self.encoding.lower() == 'utf-8' and line[0] == chr( 65279): #U+FEFF or \ufeff logging.info( " ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[ 1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search('^; TITLE:\\s', line) if match: if BibleOrgSysGlobals.debugFlag: print("First line got type {!r} match from {!r}". format(match.group(0), line)) else: if BibleOrgSysGlobals.verbosityLevel > 3: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print ( 'ForgeForSwordSearcher file line is "' + line + '"' ) lastLine = line # Process header stuff if line.startswith('; TITLE:'): string = line[8:].strip() if string: settingsDict['TITLE'] = string continue elif line.startswith('; ABBREVIATION:'): string = line[15:].strip() if string: settingsDict['ABBREVIATION'] = string continue elif line.startswith('; HAS ITALICS'): string = line[14:].strip() if string: settingsDict['HAS_ITALICS'] = string continue elif line.startswith('; HAS FOOTNOTES:'): string = line[15:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS FOOTNOTES'): string = line[14:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS REDLETTER'): string = line[14:].strip() if string: settingsDict['HAS_REDLETTER'] = string continue elif line[0] == ';': logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}" .format(line)) continue # Just discard comment lines # Process the main segment if line.startswith('$$ '): if metadataName and metadataContents: settingsDict[metadataName] = metadataContents metadataName = None pointer = line[3:] #print( "pointer", repr(pointer) ) if pointer and pointer[0] == '{' and pointer[-1] == '}': metadataName = pointer[1:-1] if metadataName: #print( "metadataName", repr(metadataName) ) metadataContents = '' else: # let's assume it's a BCV reference pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) B_CV_Bits = pointer.split(' ', 1) if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: bookCode, CVString = B_CV_Bits chapterNumberString, verseNumberString = CVString.split( ':') chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCode != lastBookCode: # We've started a new book if bookCode in ('Ge', ): BBB = 'GEN' elif bookCode in ('Le', ): BBB = 'LEV' elif bookCode in ('La', ): BBB = 'LAM' ##elif bookCode in ('Es',): BBB = 'EST' ##elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #print( "4BookCode =", repr(bookCode) ) #BBB = BOS.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode) # Try to guess #print( "4BBB =", repr(BBB) ) else: print("Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) continue # Just save the pointer information which refers to the text on the next line else: # it's not a $$ line text = line #print( "text", repr(text) ) if metadataName: metadataContents += ('\n' if metadataContents else '') + text continue else: vText = text # Handle bits like (<scripref>Pr 2:7</scripref>) vText = vText.replace('(<scripref>', '\\x - \\xt ').replace( '</scripref>)', '\\x*') vText = vText.replace('<scripref>', '\\x - \\xt ').replace( '</scripref>', '\\x*') #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) ) #print( BBB, chapterNumber, verseNumber, repr(vText) ) # Convert {stuff} to footnotes match = re.search('\\{(.+?)\\}', vText) while match: footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1)) vText = vText[:match.start( )] + footnoteText + vText[ match.end():] # Replace this footnote #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\{(.+?)\\}', vText) # Convert [stuff] to added fields match = re.search('\\[(.+?)\\]', vText) while match: addText = '\\add {}\\add*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\[(.+?)\\]', vText) # Convert +r/This text is red-letter-r/ to wj fields match = re.search('\\+r/(.+?)-r/', vText) while match: addText = '\\wj {}\\wj*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\+r/(.+?)-r/', vText) # Final check for unexpected remaining formatting for badChar in '{}[]/': if badChar in vText: logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}" .format(BBB, chapterNumberString, verseNumberString, vText)) break if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB)) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object' thisBook.objectTypeString = 'ForgeForSwordSearcher' verseList = BOSx.getNumVersesList(BBB) numChapters, numVerses = len( verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code" .format(bookCode)) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}". format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})" .format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}" ).format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) # Check for paragraph markers if vText and vText[0] == '¶': thisBook.addLine('p', '') vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}" .format(line)) # Save the final book if thisBook is not None: self.stashBook(thisBook) # Clean up if settingsDict: #print( "ForgeForSwordSearcher settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Forge4SS'] = settingsDict self.applySuppliedMetadata( 'Forge4SS') # Copy some to self.settingsDict self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Unbound'] = {} lastLine, lineCount = '', 0 BBB = None NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None subverseNumberString = sequenceNumberString = None lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1 lastVText = '' with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" ) #line = line[1:] # Remove the Unicode Byte Order Marker (BOM) if line and line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'UB file line is "' + line + '"' ) if line[0] == '#': hashBits = line[1:].split('\t') if len(hashBits) == 2 and hashBits[ 1]: # We have some valid meta-data self.suppliedMetadata['Unbound'][ hashBits[0]] = hashBits[1] #if hashBits[0] == 'name': self.name = hashBits[1] #elif hashBits[0] == 'filetype': self.filetype = hashBits[1] #elif hashBits[0] == 'copyright': self.copyright = hashBits[1] #elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1] #elif hashBits[0] == 'language': self.language = hashBits[1] #elif hashBits[0] == 'note': self.note = hashBits[1] #elif hashBits[0] == 'columns': self.columns = hashBits[1] #logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) ) continue # Just discard comment lines bits = line.split('\t') #print( self.givenName, BBB, bits ) if len(bits) == 4: bookCode, chapterNumberString, verseNumberString, vText = bits elif len(bits) == 6: bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 9: NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_'): logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue else: print("Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) halt if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3 if NRSVA_chapterNumberString: assert NRSVA_chapterNumberString.isdigit() if NRSVA_verseNumberString: assert NRSVA_verseNumberString.isdigit() if not bookCode and not chapterNumberString and not verseNumberString: print("Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if BibleOrgSysGlobals.debugFlag: assert verseNumberString.isdigit() if subverseNumberString: logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString)) vText = vText.strip() # Remove leading and trailing spaces if not vText: continue # Just ignore blank verses I think if vText == '+': continue # Not sure what this means in basic_english JHN 1:38 chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if sequenceNumberString: if BibleOrgSysGlobals.debugFlag: assert sequenceNumberString.isdigit() sequenceNumber = int(sequenceNumberString) if BibleOrgSysGlobals.debugFlag: assert sequenceNumber > lastSequence or \ self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) # Why??? lastSequence = sequenceNumber if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'Unbound Bible Book object' thisBook.objectTypeString = 'Unbound' lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}"). format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}"). format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.stashBook(thisBook) self.applySuppliedMetadata('Unbound') # Copy some to self.settingsDict self.doPostLoadProcessing()
def load(self): """ Load the compressed data file and import book elements. """ import zlib if BibleOrgSysGlobals.verbosityLevel > 1: print(_("\nLoading {}…").format(self.sourceFilepath)) with open(self.sourceFilepath, 'rb') as myFile: # Automatically closes the file when done fileBytes = myFile.read() if BibleOrgSysGlobals.debugFlag: print(" {:,} bytes read".format(len(fileBytes))) keep = {} index = 0 #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] ) keep['block1'] = fileBytes[index:index + 32] hString = '' for j in range(0, 32): char8 = fileBytes[index + j] #print( char8, repr(char8) ) if char8 < 0x20: break hString += chr(char8) if BibleOrgSysGlobals.debugFlag: print('block1b', hexlify(fileBytes[index + j:index + 32])) # Skipped some (important?) binary here index += 32 if BibleOrgSysGlobals.debugFlag: print('hString', repr(hString), index) assert hString == 'EasyWorship Bible Text' #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] ) keep['block2'] = fileBytes[index:index + 56] nString = '' for j in range(0, 32): char8 = fileBytes[index + j] #print( char8, repr(char8) ) if char8 < 0x20: break nString += chr(char8) # Skipped some zeroes here index += 56 if BibleOrgSysGlobals.debugFlag: print('nString', repr(nString), index) self.name = nString rawBooks = [] for b in range(1, 66 + 1): bookAbbrev = '' for j in range(0, 32): char8 = fileBytes[index + j] #print( char8, repr(char8) ) if char8 < 0x20: break bookAbbrev += chr(char8) # Skipped some zeroes here index += 51 if bookAbbrev and bookAbbrev[-1] == '.': bookAbbrev = bookAbbrev[:-1] # Remove final period if BibleOrgSysGlobals.verbosityLevel > 2: print('bookAbbrev', repr(bookAbbrev)) numChapters = fileBytes[index] numVerses = [] for j in range(0, numChapters): numVerses.append(fileBytes[index + j + 1]) # Skipped some zeroes here index += 157 if BibleOrgSysGlobals.debugFlag: print(' ', numChapters, numVerses) bookStart, = struct.unpack("<I", fileBytes[index:index + 4]) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print(' bookStart', bookStart) bookLength, = struct.unpack("<I", fileBytes[index:index + 4]) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print(' bookLength', bookLength, bookStart + bookLength) bookBytes = fileBytes[bookStart:bookStart + bookLength] assert bookBytes[ 0] == 0x78 and bookBytes[1] == 0xda # Zlib compression header rawBooks.append((bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes)) if BibleOrgSysGlobals.debugFlag: print('unknown block3', index, hexlify(fileBytes[index:index + 30])) keep['block3'] = fileBytes[index:index + 30] length3, = struct.unpack("<I", fileBytes[index:index + 4]) if length3: block3 = fileBytes[index + 4:index + 4 + length3 - 4] byteResult = zlib.decompress(block3) textResult = byteResult.decode('utf8') if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print("Got", len(textResult), textResult, 'from', length3) keep['block3n'] = textResult if self.name: print('Overwriting module name {!r} with {!r}'.format( self.name, textResult)) self.name = textResult index += length3 if BibleOrgSysGlobals.debugFlag: print('end of contents', index, hexlify(fileBytes[index:index + 60])) keep['block4'] = rawBooks[0][3] block5 = fileBytes[index:rawBooks[0][3]] keep['block5'] = block5 index += len(block5) #if self.abbreviation in ( 'TB', ): # Why don't the others work assert index == rawBooks[0][ 3] # Should now be at the start of the first book (already fetched above) assert len(rawBooks) == 66 # Look at extra stuff at end endBytes = fileBytes[bookStart + bookLength:] if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print('endBytes', len(endBytes), hexlify(endBytes), endBytes) assert len(endBytes) == 16 keep['block9'] = endBytes # Skipped some binary and some text here del fileBytes # Now we have to decode the book text (compressed about 4x with zlib) for j, BBB in enumerate(BOS.getBookList()): if BibleOrgSysGlobals.verbosityLevel > 2: print(' Decoding {}…'.format(BBB)) bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[ j] byteResult = zlib.decompress(bookBytes) textResult = byteResult.decode('utf8') if '\t' in textResult: logging.warning("Replacing tab characters in {} = {}".format( BBB, bookAbbrev)) textResult = textResult.replace('\t', ' ') #print( textResult ) if BibleOrgSysGlobals.strictCheckingFlag: assert ' ' not in textResult thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'EasyWorship Bible Book object' thisBook.objectTypeString = 'EasyWorship Bible' if bookAbbrev: thisBook.addLine('toc3', bookAbbrev) C, V = '0', '-1' # So id line starts at 0:0 for line in textResult.split('\r\n'): if not line: continue # skip blank lines if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print('Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line)) assert line[0].isdigit() assert ':' in line[:4] CV, verseText = line.split(' ', 1) newC, newV = CV.split(':') #print( newC, V, repr(verseText) ) if newC != C: if self.abbreviation == 'hcsb' and BBB in ( 'SA2', ): # Handle a bad bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order chapters in {}!". format(BBB)) else: assert int(newC) > int(C) C, V = newC, '0' thisBook.addLine('c', C) if self.abbreviation == 'TB' and BBB == 'JOL': # Handle a bug -- chapter 3 repeats if int(newV) < int(V): break elif self.abbreviation == 'rsv' and BBB in ( 'EXO', 'HAG', ): # Handle a bug -- chapter 22 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) elif self.abbreviation == 'gnt' and BBB in ( 'ISA', 'ZEC', 'MRK', ): # Handle a bug -- chapter 38 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) elif self.abbreviation == 'hcsb' and BBB in ( 'SA2', ): # Handle a bug -- chapter 24 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) elif self.abbreviation == 'msg' and BBB in ( 'NUM', 'JDG', 'SA2', 'CH2', 'EZE', 'ACT', ): # Handle a bug -- chapter 24 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) else: try: assert int(newV) > int(V) except ValueError: if BibleOrgSysGlobals.debugFlag: print( "Something's not an integer around {} {}:{} {}" .format(BBB, C, V, verseText)) V = newV thisBook.addLine('v', V + ' ' + verseText) if BibleOrgSysGlobals.verbosityLevel > 3: print("Saving", BBB) self.stashBook(thisBook) self.doPostLoadProcessing() return keep
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) def decodeVerse(encodedVerseString): """ Decodes the verse which has @ format codes. """ verseString = encodedVerseString if verseString.startswith( '@@'): # This simply means that encoding follows verseString = verseString[2:] if verseString.startswith( '@@'): # This simply means that encoding follows verseString = verseString[2:] # Paragraph markers (marked now with double backslash) verseString = verseString.replace('@^', '\\\\p ') verseString = verseString.replace('@0', '\\\\m ') verseString = verseString.replace('@1', '\\\\q1 ').replace( '@2', '\\\\q2 ').replace('@3', '\\\\q3 ').replace('@4', '\\q4 ') verseString = verseString.replace('@8', '\\\\m ') # Character markers (marked now with single backslash) verseString = verseString.replace('@6', '\\wj ').replace('@5', '\\wj*') verseString = verseString.replace('@9', '\\add ').replace( '@7', '\\add*') # or \\i ??? verseString = re.sub(r'@<f([0-9])@>@/', r'\\ff\1', verseString) verseString = re.sub(r'@<x([0-9])@>@/', r'\\xx\1', verseString) #print( repr( verseString ) ) assert ('@' not in verseString) return verseString # end of decodeVerse # Read all the lines into bookDict lastLine, lineCount = '', 0 bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict( ), OrderedDict(), {}, {}, {} BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = '' lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " YETBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'YETBible file line is "' + line + '"' ) bits = line.split('\t') #print( self.givenName, BBB, bits ) if bits[0] == 'info': assert (len(bits) == 3) if bits[1] == 'shortName': shortName = bits[2] self.name = shortName elif bits[1] == 'longName': longName = bits[2] elif bits[1] == 'description': description = bits[2] elif bits[1] == 'locale': locale = bits[2] assert (2 <= len(locale) <= 3) if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding else: logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \ .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) ) continue elif bits[0] == 'book_name': assert (3 <= len(bits) <= 4) thisBBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1]) if len(bits) == 3: bookNameDict[thisBBB] = bits[2], '' elif len(bits) == 4: bookNameDict[thisBBB] = bits[2], bits[3] continue elif bits[0] == 'verse': assert (len(bits) == 5) bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) ) if BBB != lastBBB: # We have a new book if lastBBB is not None: # We have a completed book to save bookDict[lastBBB] = bookLines assert (BBB in bookNameDict) bookLines = OrderedDict() # Keys are (C,V) strings verseString = decodeVerse(encodedVerseString) bookLines[(chapterNumberString, verseNumberString )] = verseString # Just store it for now lastBBB = BBB continue elif bits[0] == 'pericope': assert (len(bits) == 5) bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) headingString = encodedHeadingString.replace( '@9', '\\it ').replace('@7', '\\it*') #print( repr(encodedHeadingString), repr(headingString) ) assert ('@' not in headingString) headingDict[(BBB, chapterNumberString, verseNumberString)] = headingString, [ ] # Blank refList continue elif bits[ 0] == 'parallel': # These lines optionally follow pericope lines assert (len(bits) == 2) heading, refList = headingDict[(BBB, chapterNumberString, verseNumberString)] refList.append(bits[1]) #print( "parallel2", repr(heading), refList ) headingDict[(BBB, chapterNumberString, verseNumberString)] = heading, refList continue elif bits[0] == 'xref': assert (len(bits) == 6) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) assert (indexNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) noteString = encodedNoteString.replace('@9', '\\it ').replace( '@7', '\\it*') noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now noteString = noteString.replace('@/', '') #print( repr(encodedNoteString), repr(noteString) ) assert ('@' not in noteString) xrefDict[(BBB, chapterNumberString, verseNumberString, indexNumberString)] = noteString continue elif bits[0] == 'footnote': assert (len(bits) == 6) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) assert (indexNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) noteString = encodedNoteString.replace('@9', '\\it ').replace( '@7', '\\it*') assert ('@' not in noteString) footnoteDict[(BBB, chapterNumberString, verseNumberString, indexNumberString)] = noteString continue else: print("YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) halt bookDict[lastBBB] = bookLines # Save the last book #if bookCode != lastBookCode: # We've started a new book #if lastBookCode != -1: # Better save the last book #self.saveBook( thisBook ) #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode ) #thisBook = BibleBook( self, BBB ) #thisBook.objectNameString = "YET Bible Book object" #thisBook.objectTypeString = "YET" #lastBookCode = bookCode #lastChapterNumber = lastVerseNumber = -1 #if chapterNumber != lastChapterNumber: # We've started a new chapter #if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception #if chapterNumber == 0: #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.addLine( 'c', chapterNumberString ) #lastChapterNumber = chapterNumber #lastVerseNumber = -1 ## Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': ## Move Psalm titles to verse zero #verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.addLine( 'v', verseNumberString + ' ' + vText ) #lastVText = vText #lastVerseNumber = verseNumber # Now process the books for BBB, bkData in bookDict.items(): #print( "Processing", BBB ) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "YET Bible Book object" thisBook.objectTypeString = "YET" lastChapterNumberString = None for (chapterNumberString, verseNumberString), verseString in bkData.items(): # Insert headings (can only occur before verses) if (BBB, chapterNumberString, verseNumberString) in headingDict: heading, refList = headingDict[(BBB, chapterNumberString, verseNumberString)] #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) thisBook.addLine('s', heading) if refList: refString = "" #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) for ref in refList: refString += ('; ' if refString else '') + ref #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) ) thisBook.addLine('r', '(' + refString + ')') # Insert footnotes and cross-references while ('\\ff' in verseString): #print( "footnote", repr(verseString) ) fIx = verseString.index('\\ff') caller = verseString[fIx + 3] #print( "fcaller", repr(caller) ) assert (caller.isdigit()) note = footnoteDict[(BBB, chapterNumberString, verseNumberString, caller)] #print( "fnote", repr(note) ) verseString = verseString[: fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[ fIx + 4:] #print( "fvS", repr(verseString) ) while ('\\xx' in verseString): #print( "xref", repr(verseString) ) fIx = verseString.index('\\xx') caller = verseString[fIx + 3] #print( "xcaller", repr(caller) ) assert (caller.isdigit()) note = xrefDict[(BBB, chapterNumberString, verseNumberString, caller)] #print( "xnote", repr(note) ) verseString = verseString[: fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[ fIx + 4:] #print( "xvS", repr(verseString) ) # Save the Bible data fields if chapterNumberString != lastChapterNumberString: thisBook.addLine('c', chapterNumberString) lastChapterNumberString = chapterNumberString #print( BBB, chapterNumberString, verseNumberString, repr(verseString) ) if verseString.startswith( '\\\\'): # It's an initial paragraph marker if verseString[3] == ' ': marker, verseString = verseString[2], verseString[4:] elif verseString[4] == ' ': marker, verseString = verseString[2:4], verseString[5:] else: halt #print( '', '\\'+marker ) thisBook.addLine(marker, '') assert (not verseString.startswith('\\\\')) bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers) for j, bit in enumerate(bits): #print( "loop", j, repr(bit), repr(verseString) ) if j == 0: thisBook.addLine( 'v', verseNumberString + ' ' + verseString.rstrip()) else: if bit[1] == ' ': marker, bit = bit[0], bit[2:] elif bit[2] == ' ': marker, bit = bit[0:2], bit[3:] else: halt #print( "mV", marker, repr(bit), repr(verseString) ) thisBook.addLine(marker, bit.rstrip()) self.saveBook(thisBook) self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG') if BOS81 is None: BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG') if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG') if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount == 1: if self.encoding.lower() == 'utf-8' and line[0] == chr( 65279): #U+FEFF or \ufeff logging.info( " VPLBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[ 1:] # Remove the Unicode Byte Order Marker (BOM) # Try to identify the VPL type match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line) if match: vplType = 1 else: match = re.search('^(\\d{8})\\s', line) if match: vplType = 2 else: match = re.search('^# language_name:\\s', line) if match: vplType = 3 #else: #match = re.search( '^; TITLE:\\s', line ) #if match: vplType = 4 if match: if BibleOrgSysGlobals.debugFlag: print( "First line got type #{} {!r} match from {!r}". format(vplType, match.group(0), line)) else: if BibleOrgSysGlobals.verbosityLevel > 3: print( "VPLBible.load: (unexpected) first line was {!r} in {}" .format(line, self.sourceFilepath)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print( 'vplType', vplType ) #print ( 'VPL file line is "' + line + '"' ) lastLine = line # Process header stuff if vplType == 3: if line.startswith('# language_name:'): string = line[16:].strip() if string and string != 'Not available': settingsDict['LanguageName'] = string continue elif line.startswith('# closest ISO 639-3:'): string = line[20:].strip() if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string continue elif line.startswith('# year_short:'): string = line[13:].strip() if string and string != 'Not available': settingsDict['Year.short'] = string continue elif line.startswith('# year_long:'): string = line[12:].strip() if string and string != 'Not available': settingsDict['Year.long'] = string continue elif line.startswith('# title:'): string = line[8:].strip() if string and string != 'Not available': settingsDict['WorkTitle'] = string continue elif line.startswith('# URL:'): string = line[6:].strip() if string and string != 'Not available': settingsDict['URL'] = string continue elif line.startswith('# copyright_short:'): string = line[18:].strip() if string and string != 'Not available': settingsDict['Copyright.short'] = string continue elif line.startswith('# copyright_long:'): string = line[17:].strip() if string and string != 'Not available': settingsDict['Copyright.long'] = string continue elif line[0] == '#': logging.warning( "VPLBible.load {} is skipping unknown line: {}". format(vplType, line)) continue # Just discard comment lines #elif vplType == 4: #if line.startswith( '; TITLE:' ): #string = line[8:].strip() #if string: settingsDict['TITLE'] = string #continue #elif line.startswith( '; ABBREVIATION:' ): #string = line[15:].strip() #if string: settingsDict['ABBREVIATION'] = string #continue #elif line.startswith( '; HAS ITALICS:' ): #string = line[15:].strip() #if string: settingsDict['HAS_ITALICS'] = string #continue #elif line.startswith( '; HAS FOOTNOTES:' ): #string = line[15:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS FOOTNOTES' ): #string = line[14:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS REDLETTER:' ): #string = line[15:].strip() #if string: settingsDict['HAS_REDLETTER'] = string #continue #elif line[0]==';': #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) ) #continue # Just discard comment lines # Process the main segment if vplType == 1: bits = line.split(' ', 2) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCodeText, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':') #print( "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) ) if chapterNumberString == '': chapterNumberString = '1' # Handle a bug in some single chapter books in VPL else: print("Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits) if not bookCodeText and not chapterNumberString and not verseNumberString: print("Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) continue if BibleOrgSysGlobals.debugFlag: assert 2 <= len(bookCodeText) <= 4 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}". format(bookCodeText, BBB, chapterNumberString, verseNumberString)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit() continue chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB #if bookCodeText in ('Ge',): BBB = 'GEN' if bookCodeText == 'Le' and lastBBB == 'GEN': BBB = 'LEV' elif bookCodeText in ('Jud', ) and lastBBB == 'JOS': BBB = 'JDG' #elif bookCodeText in ('Es',): BBB = 'EST' #elif bookCodeText in ('Pr',): BBB = 'PRO' #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG' #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM' #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP' #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT #elif bookCodeText in ('Jude',): BBB = 'JDE' #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ' #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN' else: BBB = BOS66.getBBBFromText( bookCodeText) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCodeText) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCodeText) # Try to guess if not BBB: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCodeText) # Try to guess if not BBB: logging.critical( "VPL Bible: Unable to determine book code from text {!r} after {!r}={}" .format(bookCodeText, lastBookCodeText, lastBBB)) halt # Handle special formatting # [square-brackets] are for Italicized words # <angle-brackets> are for the Words of Christ in Red # «chevrons» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0] == '«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB == 'PSA' and verseNumberString == '1': # Psalm title vBits = vText[1:].split('»') #print( "vBits", vBits ) thisBook.addLine('d', vBits[0]) # Psalm title vText = vBits[1].lstrip() # Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif vplType in (2, 3): bits = line.split('\t', 1) #print( self.givenName, BBB, bits ) bookNumberString, chapterNumberString, verseNumberString = bits[ 0][:2], bits[0][2:5], bits[0][5:] #print( bookNumberString, chapterNumberString, verseNumberString ) while len(chapterNumberString ) > 1 and chapterNumberString[0] == '0': chapterNumberString = chapterNumberString[ 1:] # Remove leading zeroes while len(verseNumberString ) > 1 and verseNumberString[0] == '0': verseNumberString = verseNumberString[ 1:] # Remove leading zeroes bookCodeText, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int( verseNumberString) vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \ .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \ .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!') if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB bnDict = { 67: 'TOB', 68: 'JDT', 69: 'ESG', 70: 'WIS', 71: 'SIR', 72: 'BAR', 73: 'LJE', 74: 'PAZ', 75: 'SUS', 76: 'BEL', 77: 'MA1', 78: 'MA2', 79: 'MA3', 80: 'MA4', 81: 'ES1', 82: 'ES2', 83: 'MAN', 84: 'PS2', 85: 'PSS', 86: 'ODE', } if 1 <= bookCodeText <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCodeText) else: BBB = bnDict[bookCodeText] #elif vplType == 4: #if line.startswith( '$$ ' ): #if metadataName and metadataContents: #settingsDict[metadataName] = metadataContents #metadataName = None #pointer = line[3:] ##print( "pointer", repr(pointer) ) #if pointer and pointer[0]=='{' and pointer[-1]=='}': #metadataName = pointer[1:-1] #if metadataName: ##print( "metadataName", repr(metadataName) ) #metadataContents = '' #else: # let's assume it's a BCV reference #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) #B_CV_Bits = pointer.split( ' ', 1 ) #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: #bookCodeText, CVString = B_CV_Bits #chapterNumberString, verseNumberString = CVString.split( ':' ) #chapterNumber = int( chapterNumberString ) #verseNumber = int( verseNumberString ) #if bookCodeText != lastBookCodeText: # We've started a new book #if bookCodeText in ('Ge',): BBB = 'GEN' #elif bookCodeText in ('Le',): BBB = 'LEV' #elif bookCodeText in ('La',): BBB = 'LAM' #else: ##print( "4bookCodeText =", repr(bookCodeText) ) ##BBB = BOS.getBBBFromText( bookCodeText ) # Try to guess #BBB = BOS66.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText ) # Try to guess ##print( "4BBB =", repr(BBB) ) #else: print( "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits ) #continue # Just save the pointer information which refers to the text on the next line #else: # it's not a $$ line #text = line ##print( "text", repr(text) ) #if metadataName: #metadataContents += ('\n' if metadataContents else '') + text #continue #else: #vText = text ## Handle bits like (<scripref>Pr 2:7</scripref>) #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) ##if '\\' in vText: print( 'VPL vText', repr(vText) ) #if vplType == 4: # Forge for SwordSearcher ##print( BBB, chapterNumber, verseNumber, repr(vText) ) ## Convert {stuff} to footnotes #match = re.search( '\\{(.+?)\\}', vText ) #while match: #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\{(.+?)\\}', vText ) ## Convert [stuff] to added fields #match = re.search( '\\[(.+?)\\]', vText ) #while match: #addText = '\\add {}\\add*'.format( match.group(1) ) #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\[(.+?)\\]', vText ) #for badChar in '{}[]': #if badChar in vText: #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) #break else: logging.critical('Unknown VPL type {}'.format(vplType)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt if bookCodeText: if bookCodeText != lastBookCodeText: # We've started a new book if lastBookCodeText is not None: # Better save the last book self.stashBook(thisBook) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB)) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VPL Bible Book object' thisBook.objectTypeString = 'VPL' verseList = BOSx.getNumVersesList(BBB) numChapters, numVerses = len( verseList), verseList[0] lastBookCodeText = bookCodeText lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible{} could not figure out {!r} book code" .format(vplType, bookCodeText)) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}". format(self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})" .format(self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, numChapters)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}" ).format(self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) # Check for paragraph markers if vText and vText[0] == '¶': thisBook.addLine('p', '') vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber else: # No bookCodeText yet logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}" .format(vplType, line)) # Save the final book if thisBook is not None: self.stashBook(thisBook) # Clean up if settingsDict: #print( "VPL settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VPL'] = settingsDict self.applySuppliedMetadata('VPL') # Copy some to self.settingsDict self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata["Unbound"] = {} lastLine, lineCount = "", 0 BBB = None NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None subverseNumberString = sequenceNumberString = None lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1 lastVText = "" with open(self.sourceFilepath, encoding=self.encoding) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 # if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF # logging.info( " UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" ) # line = line[1:] # Remove the Unicode Byte Order Marker (BOM) if line[-1] == "\n": line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line # print ( 'UB file line is "' + line + '"' ) if line[0] == "#": hashBits = line[1:].split("\t") if len(hashBits) == 2 and hashBits[1]: # We have some valid meta-data self.suppliedMetadata["Unbound"][hashBits[0]] = hashBits[1] # if hashBits[0] == 'name': self.name = hashBits[1] # elif hashBits[0] == 'filetype': self.filetype = hashBits[1] # elif hashBits[0] == 'copyright': self.copyright = hashBits[1] # elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1] # elif hashBits[0] == 'language': self.language = hashBits[1] # elif hashBits[0] == 'note': self.note = hashBits[1] # elif hashBits[0] == 'columns': self.columns = hashBits[1] # logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) ) continue # Just discard comment lines bits = line.split("\t") # print( self.givenName, BBB, bits ) if len(bits) == 4: bookCode, chapterNumberString, verseNumberString, vText = bits elif len(bits) == 6: bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = ( bits ) elif len(bits) == 9: NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = ( bits ) elif len(bits) == 1 and self.givenName.startswith("lxx_a_parsing_"): logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits, ) halt if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3 if NRSVA_chapterNumberString: assert NRSVA_chapterNumberString.isdigit() if NRSVA_verseNumberString: assert NRSVA_verseNumberString.isdigit() if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if BibleOrgSysGlobals.debugFlag: assert verseNumberString.isdigit() if subverseNumberString: logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) ) vText = vText.strip() # Remove leading and trailing spaces if not vText: continue # Just ignore blank verses I think if vText == "+": continue # Not sure what this means in basic_english JHN 1:38 chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if sequenceNumberString: if BibleOrgSysGlobals.debugFlag: assert sequenceNumberString.isdigit() sequenceNumber = int(sequenceNumberString) if BibleOrgSysGlobals.debugFlag: assert sequenceNumber > lastSequence or self.givenName in ( "gothic_latin", "hebrew_bhs_consonants", "hebrew_bhs_vowels", "latvian_nt", "ukrainian_1871", ) # Why??? lastSequence = sequenceNumber if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode(bookCode) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "Unbound Bible Book object" thisBook.objectTypeString = "Unbound" lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == "ESG" # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine("c", chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if ( BBB == "PSA" and verseNumberString == "1" and vText.startswith("<") and self.givenName == "basic_english" ): # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine("v", verseNumberString + " " + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.stashBook(thisBook) self.applySuppliedMetadata("Unbound") # Copy some to self.settingsDict self.doPostLoadProcessing()
def load(self): """ Load all the books out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(exp("load()")) assert self.preloadDone if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[ 'MySword']['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.suppliedMetadata['MySword']['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.suppliedMetadata['MySword']['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Have missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}" .format(BBB, C, V, line)) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}" .format(BBB, C, V, line, self.suppliedMetadata['MySword'])) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #print( "MySword.load", BBB, C, V, repr(line) ) handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print(" MySword saving", BBB, bookCount + 1) self.stashBook(thisBook) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB) # Create the next book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' haveLines = False verseList = self.BibleOrganisationalSystem.getNumVersesList( BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False self.cursor.close() del self.cursor self.applySuppliedMetadata('MySword') # Copy some to self.settingsDict self.doPostLoadProcessing()