def __init__( self, name, BBB ): """ Create the USX Bible book object. """ BibleBook.__init__( self, name, BBB ) # Initialise the base class self.objectNameString = "USX XML Bible Book object" self.objectTypeString = "USX"
def __init__( self, containerBibleObject, BBB ): """ Create the Go Bible book object. """ BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class self.objectNameString = 'Go Bible Book object' self.objectTypeString = 'GoBible'
def __init__( self, containerBibleObject, BBB ): """ Create the ESFM Bible book object. """ BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class self.objectNameString = "ESFM Bible Book object" self.objectTypeString = "ESFM"
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating OpenSong XML book...")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB) thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), ProgNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName
def __validateAndExtractBook(self, book, bookNumber): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText(bookName) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText(adjustedBookName) BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print("Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB)) BBB = BBB2 #print( BBB ); halt if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Saving {} into results…".format(BBB)) self.stashBook(thisBook)
def __init__( self, containerBibleObject, BBB ): """ Create the ESFM Bible book object. """ BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class self.objectNameString = 'ESFM Bible Book object' self.objectTypeString = 'ESFM' global sortedNLMarkers if sortedNLMarkers is None: sortedNLMarkers = sorted( BibleOrgSysGlobals.USFMMarkers.getNewlineMarkersList('Combined'), key=len, reverse=True )
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating XML book...")) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib, value in book.items(): if attrib == "bnumber": bookNumber = value elif attrib == "bname": bookName = value elif attrib == "bsname": bookShortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "Zefania XML Bible Book object" thisBook.objectTypeString = "Zefania" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == ZefaniaXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error("Expected to find {!r} but got {!r}".format( ZefaniaXMLBible.chapterTag, element.tag)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook)
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Haggai XML Bible Book object' thisBook.objectTypeString = 'Haggai' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == HaggaiXMLBible.captionTag: sublocation = "caption in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' ) thisBook.addLine( 'mt', element.text ) elif element.tag == HaggaiXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print(_("Validating OpenSong XML book...")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if Globals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self.name, BBB) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) Globals.checkXMLNoText(element, sublocation, 'j3jd') Globals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag)) if Globals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName)) logging.error(_("OpenSong load can't find a book name"))
def __validateAndExtractBook( self, book, bookNumber ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName ) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText( adjustedBookName ) BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) ) BBB = BBB2 #print( BBB ); halt if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print( _("Validating XML book...") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB( bookName ) if BBB: if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) ) thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "Zefania XML Bible Book object" thisBook.objectTypeString = "Zefania" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == ZefaniaXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) Globals.checkXMLNoText( element, sublocation, 'j3jd' ) Globals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.chapterTag, element.tag ) ) if Globals.verbosityLevel > 2: print( " Saving {} into results...".format( BBB ) ) self.saveBook( thisBook )
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName ) # Try non-English booknames #print( "bookName", bookName, BBB ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB ) thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) ) thisBook.addLine( 'h', bookName ) thisBook.addLine( 'mt1', bookName ) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook ) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBB( bookName ) if BBB: if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = Globals.BibleBooksCodes.getUSFMAbbreviation( BBB ) thisBook.appendLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) ) thisBook.appendLine( 'h', bookName ) thisBook.appendLine( 'mt1', bookName ) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) Globals.checkXMLNoText( element, sublocation, 'j3jd' ) Globals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag ) ) if Globals.verbosityLevel > 2: print( " Saving {} into results...".format( BBB ) ) self.saveBook( thisBook ) else: logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName ) ) # no BBB else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) status = 0 # 1 = getting chapters, 2 = getting verse data lastLine, lineCount = '', 0 BBB = lastBBB = None bookDetails = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1: if line[0]==chr(65279): #U+FEFF logging.info( "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) ) line = line[1:] # Remove the UTF-16 Unicode Byte Order Marker (BOM) elif line[:3] == '': # 0xEF,0xBB,0xBF logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) ) line = line[3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM) if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines #print ( 'DB file line is "' + line + '"' ) if line[0] == '#': continue # Just discard comment lines lastLine = line if lineCount == 1: if line != '*Bible': logging.warning( "Unknown DrupalBible first line: {}".format( repr(line) ) ) elif status == 0: if line == '*Chapter': status = 1 else: # Get the version name details bits = line.split( '|' ) shortName, fullName, language = bits self.name = fullName elif status == 1: if line == '*Context': status = 2 else: # Get the book name details bits = line.split( '|' ) bookCode, bookFullName, bookShortName, numChapters = bits assert bookShortName == bookCode BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode ) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first) bookDetails[BBB] = bookFullName, bookShortName, numChapters elif status == 2: # Get the verse text bits = line.split( '|' ) bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString ) if lineMark: print( repr(lineMark) ); halt BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode ) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first) if BBB != lastBBB: if lastBBB is not None: self.stashBook( thisBook ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'DrupalBible Bible Book object' thisBook.objectTypeString = 'DrupalBible' lastChapterNumberString = None lastBBB = BBB if chapterNumberString != lastChapterNumberString: thisBook.addLine( 'c', chapterNumberString ) lastChapterNumberString = chapterNumberString verseText = verseText.replace( '<', '\\it ' ).replace( '>', '\\it*' ) thisBook.addLine( 'v', verseNumberString + ' ' + verseText ) else: halt # Save the final book self.stashBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) lastLine, lineCount = '', 0 BBB = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and self.encoding.lower( ) == 'utf-8' and line[0] == chr(65279): #U+FEFF logging.info( " VPLBible.load: Detected UTF-16 Byte Order Marker" ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'VLP file line is "' + line + '"' ) if line[0] == '#': continue # Just discard comment lines bits = line.split(' ', 2) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCode, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':') else: print("Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) if not bookCode and not chapterNumberString and not verseNumberString: print("Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BibleOrgSysGlobals.debugFlag: assert (2 <= len(bookCode) <= 4) if BibleOrgSysGlobals.debugFlag: assert (chapterNumberString.isdigit()) if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert (verseNumberString.isdigit()) continue chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.saveBook(thisBook) #if bookCode in ('Ge',): BBB = 'GEN' #elif bookCode in ('Le',): BBB = 'LEV' ##elif bookCode in ('Jud',): BBB = 'JDG' #elif bookCode in ('Es',): BBB = 'EST' #elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB( bookCode) # Try to guess if BBB: thisBook = BibleBook(self, BBB) thisBook.objectNameString = "VPL Bible Book object" thisBook.objectTypeString = "VPL" lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible could not figure out {!r} book code". format(bookCode)) if BibleOrgSysGlobals.debugFlag: halt if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert (chapterNumber > lastChapterNumber or BBB == 'ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle special formatting # [brackets] are for Italicized words # <brackets> are for the Words of Christ in Red # «brackets» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0] == '«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB == 'PSA' and verseNumberString == '1': # Psalm title vBits = vText[1:].split('»') #print( "vBits", vBits ) thisBook.addLine('d', vBits[0]) # Psalm title vText = vBits[1].lstrip() # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}"). format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}"). format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook(thisBook) self.doPostLoadProcessing()
def load( self ): """ Load the compressed data file and import book objects. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) ) with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done fileBytes = myFile.read() if debuggingThisModule or BibleOrgSysGlobals.debugFlag: print( " {:,} bytes read".format( len(fileBytes) ) ) keep = OrderedDict() index = 0 # Block 1 is 32-bytes long and always the same for EW2009 Bibles #if debuggingThisModule: print( 'introBlock', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] ) keep['introBlock'] = (index,fileBytes[index:index+32]) hString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break hString += chr( char8 ) #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index ) if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert hString == 'EasyWorship Bible Text' introBlockb = fileBytes[index+j:index+32] #if BibleOrgSysGlobals.debugFlag: print( 'introBlockb', hexlify( introBlockb ), introBlockb ) assert introBlockb == b'\x1a\x02<\x00\x00\x00\xe0\x00\x00\x00' # b'1a023c000000e0000000' # Skipped some (important?) binary here??? but it's the same for every module index += 32 # Block 2 is 56-bytes long moduleNameBlock = fileBytes[index:index+56] keep['moduleNameBlock'] = (index,moduleNameBlock) #if debuggingThisModule: print( 'moduleNameBlock', hexlify( moduleNameBlock ), moduleNameBlock ) nString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break nString += chr( char8 ) #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: print( 'nString', repr(nString), index ) if BibleOrgSysGlobals.verbosityLevel > 1: print( "EasyWorshipBible.load: " + _("Setting module name to {!r}").format( self.name ) ) self.name = nString #assert self.name # Not there for amp and gkm moduleNameBlockb = fileBytes[index+j:index+56] #if BibleOrgSysGlobals.debugFlag: print( 'moduleNameBlockb', len(moduleNameBlockb), hexlify( moduleNameBlockb ), moduleNameBlockb ) #assert moduleNameBlockb.endswith( b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00' ) # b'000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000' for ix in range( index+j, index+56 ): # Mostly zeroes remaining if ix == 84: # What does this mean??? value = fileBytes[ix] assert value in (0,1,2,3,4,5) # bbe=0, alb=1, esv2=2, esv=3, asv=4 nasb=5 Revision number??? keep['byte84'] = (index,value) else: assert fileBytes[ix] == 0 index += 56 # Get the optional booknames and the raw data for each book into a list rawBooks = [] for bookNumber in range( 1, 66+1 ): bookInfoBlock = fileBytes[index:index+51] blockName = 'bookInfoBlock-{}'.format( bookNumber ) keep[blockName] = (index,bookInfoBlock) #if debuggingThisModule: print( blockName, hexlify( bookInfoBlock ), bookInfoBlock ) bookName = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break # bookName seems quite optional -- maybe the English ones are assumed if empty??? bookName += chr( char8 ) assert fileBytes[index+j:index+51] == b'\x00' * (51-j) # Skipped some zeroes here index += 51 if bookName and bookName[-1] == '.': bookName = bookName[:-1] # Remove final period #if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2: #print( 'bookName', repr(bookName) ) numChapters = fileBytes[index] numVerses = [] for j in range( 0, numChapters ): numVerses.append( fileBytes[index+j+1] ) #print( "here1", 157-j-2, hexlify(fileBytes[index+j+2:index+157]), fileBytes[index+j+2:index+157] ) if self.abbreviation != 'fn1938': # Why does this fail??? assert fileBytes[index+j+2:index+157] == b'\x00' * (157-j-2) # Skipped some zeroes here index += 157 #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: #print( ' {!r} numChapters={} verses={}'.format( bookName, numChapters, numVerses ) ) bookStart, = struct.unpack( "<I", fileBytes[index:index+4] ) assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here index += 8 #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: #print( ' bookStart is at {:,}'.format( bookStart ) ) bookLength, = struct.unpack( "<I", fileBytes[index:index+4] ) assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here index += 8 #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: #print( ' {} bookLength is {:,} which goes to {:,}'.format( bookNumber, bookLength, bookStart+bookLength ) ) bookBytes = fileBytes[bookStart:bookStart+bookLength] # Looking ahead into the file rawBooks.append( (bookName, numChapters, numVerses, bookStart, bookLength, bookBytes) ) if bookLength == 0: # e.g., gkm Philippians (book number 50) logging.critical( "Booknumber {} is empty in {}".format( bookNumber, self.abbreviation ) ) else: #if debuggingThisModule: #print( "cHeader1 for {}: {}={} {}={}".format( self.abbreviation, bookBytes[0], hexlify(bookBytes[0:1]), bookBytes[1], hexlify(bookBytes[1:2]) ) ) assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header (for compression levels 7-9) assert index == 14872 # 32 + 56 + 224*66 workNameBlock = fileBytes[index:index+30] # 30 here is just a maximum, not fixed keep['workNameBlock'] = (index,workNameBlock) # This block starts with a length, then a work name, e.g., ezFreeASV #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: #print( 'workNameBlock', index, hexlify(workNameBlock), workNameBlock ) length3, = struct.unpack( "<I", fileBytes[index:index+4] ) #print( "length3", length3 ) # Seems to include the compressed string plus six more bytes keep['length3'] = (index,length3) if length3: bookInfoBlock = fileBytes[index+4:index+4+length3-4-6] if debuggingThisModule: print( "cHeader2 for {}: {}={} {}={}".format( self.abbreviation, bookInfoBlock[0], hexlify(bookInfoBlock[0:1]), bookInfoBlock[1], hexlify(bookInfoBlock[1:2]) ) ) assert bookInfoBlock[0]==0x78 and bookInfoBlock[1]==0xda # Zlib compression header (for compression levels 7-9) byteResult = zlib.decompress( bookInfoBlock ) #rewriteResult1 = zlib.compress( byteResult, 9 ) #byteResult1 = zlib.decompress( rewriteResult1 ) #compressor = zlib.compressobj(level=9, method=zlib.DEFLATED, wbits=15, memLevel=8, strategy=zlib.Z_DEFAULT_STRATEGY ) #rewriteResult2 = compressor.compress( byteResult ) #rewriteResult2 += compressor.flush() #byteResult2 = zlib.decompress( rewriteResult2 ) #print( "rewrite1 {} {} {}\n {} {} {}\n {} {} {}\n to {} {}\n to {} {}\n to {} {}" \ #.format( len(bookInfoBlock), hexlify(bookInfoBlock), bookInfoBlock, #len(rewriteResult1), hexlify(rewriteResult1), rewriteResult1, #len(rewriteResult2), hexlify(rewriteResult2), rewriteResult2, #len(byteResult), byteResult, #len(byteResult1), byteResult1, #len(byteResult2), byteResult2 ) ) textResult = byteResult.decode( 'utf8' ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "Block4: Got {} chars {!r} from {} bytes".format( len(textResult), textResult, length3 ) ) assert textResult.startswith('ezFree') or textResult.startswith('ezPaid') keep['workName'] = (index+4,textResult) if BibleOrgSysGlobals.verbosityLevel > 1: print( "EasyWorshipBible.load: " + _("Setting module work name to {!r}").format( textResult ) ) if self.name: self.workName = textResult else: # Should rarely happen self.name = self.workName = textResult workNameAppendage = fileBytes[index+4+length3-6-4:index+4+length3-4] #print( "workNameAppendage", len(workNameAppendage), hexlify(workNameAppendage), workNameAppendage ) keep['workNameAppendage'] = (index+4+length3-6-4,workNameAppendage) assert workNameAppendage[:4] == b'QK\x03\x04' uncompressedNameLength, = struct.unpack( "<B", workNameAppendage[4:5] ) assert workNameAppendage[5:] == b'\x00' assert len(textResult) == uncompressedNameLength keep['length3'] = (index,length3) index += length3 #print( self.abbreviation, len(textResult), repr(textResult), 'length3', length3, len(textResult)+18 ) assert length3 == len(textResult) + 18 bookDataStartIndex = rawBooks[0][3] #print( "bookDataStartIndex", bookDataStartIndex ) #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: #print( 'After known contents @ {:,}'.format( index ), hexlify( fileBytes[index:index+60] ), fileBytes[index:index+60] ) block0080 = fileBytes[index:bookDataStartIndex] #print( "block0080", index, len(block0080), hexlify(block0080), block0080 ) keep['block0080'] = (index,block0080) assert block0080 == b'\x00\x00\x08\x00' # b'00000800' index += len( block0080 ) keep['bookDataStartIndex'] = (index,bookDataStartIndex) assert index == bookDataStartIndex # Should now be at the start of the first book (already fetched above) # Look at extra stuff right at the end of the file assert len(rawBooks) == 66 index = bookStart + bookLength # of the last book endBytes = fileBytes[index:] #if BibleOrgSysGlobals.debugFlag and debuggingThisModule: #print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes ) assert len(endBytes) == 16 keep['endBytes'] = (index,endBytes) assert endBytes == b'\x18:\x00\x00\x00\x00\x00\x00ezwBible' # b'183a000000000000657a774269626c65' del fileBytes # Not needed any more # Now we have to decode the book text (compressed about 4x with zlib) if BibleOrgSysGlobals.verbosityLevel > 1: print( "EWB loading books for {}…".format( self.abbreviation ) ) for j, BBB in enumerate( BOS.getBookList() ): bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j] if bookLength == 0: assert not bookBytes logging.critical( " Skipped empty {}".format( BBB ) ) continue if BibleOrgSysGlobals.verbosityLevel > 2: print( ' Decoding {}…'.format( BBB ) ) bookBytes, bookExtra = bookBytes[:-10], bookBytes[-10:] assert len(bookExtra) == 10 keep['bookExtra-{}'.format(j+1)] = (-10,bookExtra) assert bookExtra[:4] == b'QK\x03\x04' uncompressedBookLength, = struct.unpack( "<I", bookExtra[4:8] ) assert bookExtra[8:] == b'\x08\x00' byteResult = zlib.decompress( bookBytes ) assert len(byteResult) == uncompressedBookLength try: textResult = byteResult.decode( 'utf8' ) except UnicodeDecodeError: logging.critical( "Unable to decode {} {} bookText -- maybe it's not utf-8???".format( self.abbreviation, BBB ) ) continue if debuggingThisModule: rewriteResult1 = zlib.compress( byteResult, 9 ) byteResult1 = zlib.decompress( rewriteResult1 ) if rewriteResult1 != bookBytes: print( "\nbookBytes", len(bookBytes), hexlify(bookBytes) ) print( "\nrewriteResult1", len(rewriteResult1), hexlify(rewriteResult1) ) halt if byteResult1 != byteResult: print( len(byteResult), hexlify(byteResult) ) print( len(byteResult1), hexlify(byteResult1) ) halt if '\t' in textResult: logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) ) textResult = textResult.replace( '\t', ' ' ) #print( textResult ) if BibleOrgSysGlobals.strictCheckingFlag: assert ' ' not in textResult thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'EasyWorship Bible Book object' thisBook.objectTypeString = 'EasyWorship Bible' if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev ) C, V = '-1', '-1' # So first/id line starts at -1:0 for line in textResult.split( '\r\n' ): if not line: continue # skip blank lines #if BibleOrgSysGlobals.debugFlag and debuggingThisModule: #print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) ) assert line[0].isdigit() assert ':' in line[:4] CV,verseText = line.split( ' ', 1 ) newC,newV = CV.split( ':' ) #print( newC, V, repr(verseText) ) if newC != C: if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order logging.critical( "Skipping error for out-of-order chapters in {}!".format( BBB ) ) else: assert int(newC) > int(C) C, V = newC, '0' thisBook.addLine( 'c', C ) if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats if int(newV) < int(V): break elif self.abbreviation=='drv' and BBB in ('GEN','EXO','NUM',): # Handle a bug -- Gen 18:1&12, Exo 28:42&43 out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) else: try: assert int(newV) > int(V) except ValueError: logging.critical( "Something's not an integer around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) ) except AssertionError: logging.critical( "Something's out of order around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) ) V = newV thisBook.addLine( 'v', V + ' ' + verseText ) if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB ) self.stashBook( thisBook ) self.doPostLoadProcessing() return keep
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG') if BOS81 is None: BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG') if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG') if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount == 1: if self.encoding.lower() == 'utf-8' and line[0] == chr( 65279): #U+FEFF or \ufeff logging.info( " ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[ 1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search('^; TITLE:\\s', line) if match: if BibleOrgSysGlobals.debugFlag: print("First line got type {!r} match from {!r}". format(match.group(0), line)) else: if BibleOrgSysGlobals.verbosityLevel > 3: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print ( 'ForgeForSwordSearcher file line is "' + line + '"' ) lastLine = line # Process header stuff if line.startswith('; TITLE:'): string = line[8:].strip() if string: settingsDict['TITLE'] = string continue elif line.startswith('; ABBREVIATION:'): string = line[15:].strip() if string: settingsDict['ABBREVIATION'] = string continue elif line.startswith('; HAS ITALICS'): string = line[14:].strip() if string: settingsDict['HAS_ITALICS'] = string continue elif line.startswith('; HAS FOOTNOTES:'): string = line[15:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS FOOTNOTES'): string = line[14:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS REDLETTER'): string = line[14:].strip() if string: settingsDict['HAS_REDLETTER'] = string continue elif line[0] == ';': logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}" .format(line)) continue # Just discard comment lines # Process the main segment if line.startswith('$$ '): if metadataName and metadataContents: settingsDict[metadataName] = metadataContents metadataName = None pointer = line[3:] #print( "pointer", repr(pointer) ) if pointer and pointer[0] == '{' and pointer[-1] == '}': metadataName = pointer[1:-1] if metadataName: #print( "metadataName", repr(metadataName) ) metadataContents = '' else: # let's assume it's a BCV reference pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) B_CV_Bits = pointer.split(' ', 1) if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: bookCode, CVString = B_CV_Bits chapterNumberString, verseNumberString = CVString.split( ':') chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCode != lastBookCode: # We've started a new book if bookCode in ('Ge', ): BBB = 'GEN' elif bookCode in ('Le', ): BBB = 'LEV' elif bookCode in ('La', ): BBB = 'LAM' ##elif bookCode in ('Es',): BBB = 'EST' ##elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #print( "4BookCode =", repr(bookCode) ) #BBB = BOS.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode) # Try to guess #print( "4BBB =", repr(BBB) ) else: print("Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) continue # Just save the pointer information which refers to the text on the next line else: # it's not a $$ line text = line #print( "text", repr(text) ) if metadataName: metadataContents += ('\n' if metadataContents else '') + text continue else: vText = text # Handle bits like (<scripref>Pr 2:7</scripref>) vText = vText.replace('(<scripref>', '\\x - \\xt ').replace( '</scripref>)', '\\x*') vText = vText.replace('<scripref>', '\\x - \\xt ').replace( '</scripref>', '\\x*') #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) ) #print( BBB, chapterNumber, verseNumber, repr(vText) ) # Convert {stuff} to footnotes match = re.search('\\{(.+?)\\}', vText) while match: footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1)) vText = vText[:match.start( )] + footnoteText + vText[ match.end():] # Replace this footnote #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\{(.+?)\\}', vText) # Convert [stuff] to added fields match = re.search('\\[(.+?)\\]', vText) while match: addText = '\\add {}\\add*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\[(.+?)\\]', vText) # Convert +r/This text is red-letter-r/ to wj fields match = re.search('\\+r/(.+?)-r/', vText) while match: addText = '\\wj {}\\wj*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\+r/(.+?)-r/', vText) # Final check for unexpected remaining formatting for badChar in '{}[]/': if badChar in vText: logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}" .format(BBB, chapterNumberString, verseNumberString, vText)) break if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB)) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object' thisBook.objectTypeString = 'ForgeForSwordSearcher' verseList = BOSx.getNumVersesList(BBB) numChapters, numVerses = len( verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code" .format(bookCode)) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}". format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})" .format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}" ).format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) # Check for paragraph markers if vText and vText[0] == '¶': thisBook.addLine('p', '') vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}" .format(line)) # Save the final book if thisBook is not None: self.stashBook(thisBook) # Clean up if settingsDict: #print( "ForgeForSwordSearcher settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Forge4SS'] = settingsDict self.applySuppliedMetadata( 'Forge4SS') # Copy some to self.settingsDict self.doPostLoadProcessing()
def load( self ): """ Load the compressed data file and import book elements. """ import zlib if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) ) with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done fileBytes = myFile.read() if BibleOrgSysGlobals.debugFlag: print( " {:,} bytes read".format( len(fileBytes) ) ) keep = {} index = 0 #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] ) keep['block1'] = fileBytes[index:index+32] hString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break hString += chr( char8 ) if BibleOrgSysGlobals.debugFlag: print( 'block1b', hexlify( fileBytes[index+j:index+32] ) ) # Skipped some (important?) binary here index += 32 if BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index ) assert hString == 'EasyWorship Bible Text' #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] ) keep['block2'] = fileBytes[index:index+56] nString = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break nString += chr( char8 ) # Skipped some zeroes here index += 56 if BibleOrgSysGlobals.debugFlag: print( 'nString', repr(nString), index ) self.name = nString rawBooks = [] for b in range( 1, 66+1 ): bookAbbrev = '' for j in range( 0, 32 ): char8 = fileBytes[index+j] #print( char8, repr(char8) ) if char8 < 0x20: break bookAbbrev += chr( char8 ) # Skipped some zeroes here index += 51 if bookAbbrev and bookAbbrev[-1] == '.': bookAbbrev = bookAbbrev[:-1] # Remove final period if BibleOrgSysGlobals.verbosityLevel > 2: print( 'bookAbbrev', repr(bookAbbrev) ) numChapters = fileBytes[index] numVerses = [] for j in range( 0, numChapters ): numVerses.append( fileBytes[index+j+1] ) # Skipped some zeroes here index += 157 if BibleOrgSysGlobals.debugFlag: print( ' ', numChapters, numVerses ) bookStart, = struct.unpack( "<I", fileBytes[index:index+4] ) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print( ' bookStart', bookStart ) bookLength, = struct.unpack( "<I", fileBytes[index:index+4] ) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print( ' bookLength', bookLength, bookStart+bookLength ) bookBytes = fileBytes[bookStart:bookStart+bookLength] assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header rawBooks.append( (bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes) ) if BibleOrgSysGlobals.debugFlag: print( 'unknown block3', index, hexlify( fileBytes[index:index+30] ) ) keep['block3'] = fileBytes[index:index+30] length3, = struct.unpack( "<I", fileBytes[index:index+4] ) if length3: block3 = fileBytes[index+4:index+4+length3-4] byteResult = zlib.decompress( block3 ) textResult = byteResult.decode( 'utf8' ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "Got", len(textResult), textResult, 'from', length3 ) keep['block3n'] = textResult if self.name: print( 'Overwriting module name {!r} with {!r}'.format( self.name, textResult ) ) self.name = textResult index += length3 if BibleOrgSysGlobals.debugFlag: print( 'end of contents', index, hexlify( fileBytes[index:index+60] ) ) keep['block4'] = rawBooks[0][3] block5 = fileBytes[index:rawBooks[0][3]] keep['block5'] = block5 index += len( block5 ) #if self.abbreviation in ( 'TB', ): # Why don't the others work assert index == rawBooks[0][3] # Should now be at the start of the first book (already fetched above) assert len(rawBooks) == 66 # Look at extra stuff at end endBytes = fileBytes[bookStart+bookLength:] if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes ) assert len(endBytes) == 16 keep['block9'] = endBytes # Skipped some binary and some text here del fileBytes # Now we have to decode the book text (compressed about 4x with zlib) for j, BBB in enumerate( BOS.getBookList() ): if BibleOrgSysGlobals.verbosityLevel > 2: print( ' Decoding {}…'.format( BBB ) ) bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j] byteResult = zlib.decompress( bookBytes ) textResult = byteResult.decode( 'utf8' ) if '\t' in textResult: logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) ) textResult = textResult.replace( '\t', ' ' ) #print( textResult ) if BibleOrgSysGlobals.strictCheckingFlag: assert ' ' not in textResult thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'EasyWorship Bible Book object' thisBook.objectTypeString = 'EasyWorship Bible' if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev ) C = V = '0' for line in textResult.split( '\r\n' ): if not line: continue # skip blank lines if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) ) assert line[0].isdigit() assert ':' in line[:4] CV,verseText = line.split( ' ', 1 ) newC,newV = CV.split( ':' ) #print( newC, V, repr(verseText) ) if newC != C: if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order chapters in {}!".format( BBB ) ) else: assert int(newC) > int(C) C, V = newC, '0' thisBook.addLine( 'c', C ) if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats if int(newV) < int(V): break elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) ) else: try: assert int(newV) > int(V) except ValueError: if BibleOrgSysGlobals.debugFlag: print( "Something's not an integer around {} {}:{} {}".format( BBB, C, V, verseText ) ) V = newV thisBook.addLine( 'v', V + ' ' + verseText ) if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB ) self.stashBook( thisBook ) self.doPostLoadProcessing() return keep
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Unbound'] = {} lastLine, lineCount = '', 0 BBB = None NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None subverseNumberString = sequenceNumberString = None lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1 lastVText = '' with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" ) #line = line[1:] # Remove the Unicode Byte Order Marker (BOM) if line and line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'UB file line is "' + line + '"' ) if line[0] == '#': hashBits = line[1:].split('\t') if len(hashBits) == 2 and hashBits[ 1]: # We have some valid meta-data self.suppliedMetadata['Unbound'][ hashBits[0]] = hashBits[1] #if hashBits[0] == 'name': self.name = hashBits[1] #elif hashBits[0] == 'filetype': self.filetype = hashBits[1] #elif hashBits[0] == 'copyright': self.copyright = hashBits[1] #elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1] #elif hashBits[0] == 'language': self.language = hashBits[1] #elif hashBits[0] == 'note': self.note = hashBits[1] #elif hashBits[0] == 'columns': self.columns = hashBits[1] #logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) ) continue # Just discard comment lines bits = line.split('\t') #print( self.givenName, BBB, bits ) if len(bits) == 4: bookCode, chapterNumberString, verseNumberString, vText = bits elif len(bits) == 6: bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 9: NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_'): logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue else: print("Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) halt if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3 if NRSVA_chapterNumberString: assert NRSVA_chapterNumberString.isdigit() if NRSVA_verseNumberString: assert NRSVA_verseNumberString.isdigit() if not bookCode and not chapterNumberString and not verseNumberString: print("Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if BibleOrgSysGlobals.debugFlag: assert verseNumberString.isdigit() if subverseNumberString: logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString)) vText = vText.strip() # Remove leading and trailing spaces if not vText: continue # Just ignore blank verses I think if vText == '+': continue # Not sure what this means in basic_english JHN 1:38 chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if sequenceNumberString: if BibleOrgSysGlobals.debugFlag: assert sequenceNumberString.isdigit() sequenceNumber = int(sequenceNumberString) if BibleOrgSysGlobals.debugFlag: assert sequenceNumber > lastSequence or \ self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) # Why??? lastSequence = sequenceNumber if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'Unbound Bible Book object' thisBook.objectTypeString = 'Unbound' lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}"). format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}"). format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.stashBook(thisBook) self.applySuppliedMetadata('Unbound') # Copy some to self.settingsDict self.doPostLoadProcessing()
class USFXXMLBible( Bible ): """ Class to load and manipulate USFX Bibles. """ def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ): """ Create the internal USFX Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "USFX XML Bible object" self.objectTypeString = "USFX" self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters # Now we can set our object variables self.name = self.givenName if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "USFX Bible" if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects # Do a preliminary check on the readability of our folder if not os.access( self.sourceFolder, os.R_OK ): logging.error( "USFXXMLBible: Folder '{}' is unreadable".format( self.sourceFolder ) ) # Do a preliminary check on the contents of our folder self.sourceFilename = self.sourceFilepath = None foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith( ending): ignore=True; break if ignore: continue if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append( something ) else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) ) if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, foundFolders ) ) if not foundFiles: if Globals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in '{}'".format( self.sourceFolder ) ) return # No use continuing #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles ) numFound = 0 for thisFilename in sorted( foundFiles ): firstLines = Globals.peekIntoFile( thisFilename, sourceFolder, numLines=3 ) if not firstLines or len(firstLines)<2: continue if not firstLines[0].startswith( '<?xml version="1.0"' ) \ and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM if Globals.verbosityLevel > 2: print( "USFXB (unexpected) first line was '{}' in {}".format( firstLines, thisFilename ) ) continue if "<usfx " not in firstLines[0]: continue lastFilenameFound = thisFilename numFound += 1 if numFound: if Globals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound ) if numFound == 1: self.sourceFilename = lastFilenameFound self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename ) elif looksHopeful and Globals.verbosityLevel > 2: print( " Looked hopeful but no actual files found" ) # end of USFXXMLBible.__init_ def load( self ): """ Load the XML data file -- we should already know the filepath. """ if Globals.verbosityLevel > 1: print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) ) #if Globals.verbosityLevel > 2: print( _(" It seems we have {}...").format( BBB ) ) #self.thisBook = BibleBook( self, BBB ) #self.thisBook.objectNameString = "OSIS XML Bible Book object" #self.thisBook.objectTypeString = "OSIS" #self.haveBook = True try: self.tree = ElementTree().parse( self.sourceFilepath ) except ParseError: errorString = sys.exc_info()[1] logging.critical( "USFXXMLBible.load: failed loading the xml file {}: '{}'.".format( self.sourceFilepath, errorString ) ) return if Globals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all # Find the main (osis) container if self.tree.tag == 'usfx': location = "USFX file" Globals.checkXMLNoText( self.tree, location, '4f6h' ) Globals.checkXMLNoTail( self.tree, location, '1wk8' ) # Process the attributes first self.schemaLocation = None for attrib,value in self.tree.items(): #print( "attrib", repr(attrib), repr(value) ) if attrib.endswith("SchemaLocation"): self.schemaLocation = value else: logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) ) BBB = C = V = None for element in self.tree: #print( "element", repr(element.tag) ) sublocation = element.tag + " " + location if element.tag == 'languageCode': self.languageCode = element.text Globals.checkXMLNoTail( element, sublocation, 'cff3' ) Globals.checkXMLNoAttributes( element, sublocation, 'des1' ) Globals.checkXMLNoSubelements( element, sublocation, 'dwf2' ) elif element.tag == 'book': self.loadBook( element ) ##Globals.checkXMLNoSubelements( element, sublocation, '54f2' ) #Globals.checkXMLNoTail( element, sublocation, 'hd35' ) ## Process the attributes #idField = bookStyle = None #for attrib,value in element.items(): #if attrib=='id' or attrib=='code': #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode) ##if idField != BBB: ## logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) ) #elif attrib=='style': #bookStyle = value #else: #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) else: logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) ) #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here if Globals.verbosityLevel > 2: print( "USFXXMLBible.load: Didn't find any regularly named USFX files in '{}'".format( self.sourceFolder ) ) for thisFilename in foundFiles: # Look for BBB in the ID line (which should be the first line in a USFX file) isUSFX = False thisPath = os.path.join( self.sourceFolder, thisFilename ) with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done for line in possibleUSXFile: if line.startswith( '\\id ' ): USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id if Globals.verbosityLevel > 2: print( "Have possible USFX ID '{}'".format( USXId ) ) BBB = Globals.BibleBooksCodes.getBBBFromUSFM( USXId ) if Globals.verbosityLevel > 2: print( "BBB is '{}'".format( BBB ) ) isUSFX = True break # We only look at the first line if isUSFX: UBB = USFXXMLBibleBook( self, BBB ) UBB.load( self.sourceFolder, thisFilename, self.encoding ) UBB.validateMarkers() print( UBB ) self.books[BBB] = UBB # Make up our book name dictionaries while we're at it assumedBookNames = UBB.getAssumedBookNames() for assumedBookName in assumedBookNames: self.BBBToNameDict[BBB] = assumedBookName assumedBookNameLower = assumedBookName.lower() self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case) self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case) if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces) if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) ) self.doPostLoadProcessing() # end of USFXXMLBible.load def loadBook( self, bookElement ): """ Load the book container from the XML data file. """ if Globals.verbosityLevel > 3: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) ) assert( bookElement.tag == 'book' ) mainLocation = self.name + " USFX book" # Process the attributes first bookCode = None for attrib,value in bookElement.items(): if attrib == 'id': bookCode = value else: logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) ) BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode ) mainLocation = "{} USFX {} book".format( self.name, BBB ) if Globals.verbosityLevel > 2: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) ) Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' ) Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' ) # Now create our actual book self.thisBook = BibleBook( self, BBB ) self.thisBook.objectNameString = "USFX XML Bible Book object" self.thisBook.objectTypeString = "USFX" C = V = '0' for element in bookElement: #print( "element", repr(element.tag) ) location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V ) if element.tag == 'id': idText = clean( element.text ) Globals.checkXMLNoTail( element, location, 'vsg3' ) Globals.checkXMLNoSubelements( element, location, 'ksq2' ) for attrib,value in element.items(): if attrib == 'id': assert( value == bookCode ) else: logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') ) elif element.tag == 'ide': ideText = clean( element.text ) Globals.checkXMLNoTail( element, location, 'jsa0' ) Globals.checkXMLNoSubelements( element, location, 'ls01' ) charset = None for attrib,value in element.items(): if attrib == 'charset': charset = value else: logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') ) elif element.tag == 'h': hText = element.text Globals.checkXMLNoTail( element, location, 'dj35' ) Globals.checkXMLNoAttributes( element, location, 'hs35' ) Globals.checkXMLNoSubelements( element, location, 'hs32' ) self.thisBook.appendLine( 'h', clean(hText) ) elif element.tag == 'toc': tocText = element.text Globals.checkXMLNoTail( element, location, 'ss13' ) Globals.checkXMLNoSubelements( element, location, 'js13' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems compulsory level = value else: logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'toc'+level, clean(tocText) ) elif element.tag == 'c': Globals.checkXMLNoText( element, location, 'ks35' ) Globals.checkXMLNoTail( element, location, 'gs35' ) Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone for attrib,value in element.items(): if attrib == 'id': C, V = value, '0' else: logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'c', C ) elif element.tag == 's': sText = clean( element.text ) Globals.checkXMLNoTail( element, location, 'wxg0' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems optional level = value else: logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) marker = 's' if level: marker += level self.thisBook.appendLine( marker, sText ) for subelement in element: #print( "subelement", repr(subelement.tag) ) sublocation = subelement.tag + " of " + location if subelement.tag == 'f': self.loadFootnote( subelement, sublocation ) elif subelement.tag == 'x': self.loadCrossreference( subelement, sublocation ) elif subelement.tag == 'fig': self.loadFigure( subelement, sublocation ) elif subelement.tag == 'table': self.loadTable( subelement, sublocation ) elif subelement.tag in ('add','it','bd','bdit','sc',): self.loadCharacterFormatting( subelement, sublocation ) elif subelement.tag == 'optionalLineBreak': print( "What is loadBook optionalLineBreak?" ) else: logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) ) elif element.tag in ('p','q','d',): V = self.loadParagraph( element, location, C ) elif element.tag == 'b': Globals.checkXMLNoText( element, location, 'ks35' ) Globals.checkXMLNoTail( element, location, 'gs35' ) Globals.checkXMLNoAttributes( element, location, 'nd04' ) Globals.checkXMLNoSubelements( element, location, 'kdr3' ) self.thisBook.appendLine( 'b', '' ) elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers marker, text = element.tag, clean(element.text) Globals.checkXMLNoTail( element, location, 'od01' ) Globals.checkXMLNoAttributes( element, location, 'us91' ) Globals.checkXMLNoSubelements( element, location, 'gd92' ) self.thisBook.appendLine( marker, text ) elif element.tag == 'table': self.loadTable( element, location ) else: logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) ) #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if Globals.debugFlag: halt self.saveBook( self.thisBook ) # end of USFXXMLBible.loadBook def loadParagraph( self, paragraphElement, paragraphLocation, C ): """ Load the paragraph (p or q) container from the XML data file. """ #if Globals.verbosityLevel > 3: #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) ) V = None pText = paragraphElement.text Globals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' ) # Process the attributes first sfm = level = style = None for attrib,value in paragraphElement.items(): if attrib == 'sfm': sfm = value elif attrib == 'level': level = value elif attrib == 'style': style = value else: logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) ) for element in paragraphElement: location = element.tag + " of " + paragraphLocation #print( "element", repr(element.tag) ) if element.tag == 'v': # verse milestone vTail = clean( element.tail ) # Main verse text Globals.checkXMLNoText( element, location, 'crc2' ) Globals.checkXMLNoSubelements( element, location, 'lct3' ) lastV, V = V, None for attrib,value in element.items(): if attrib == 'id': V = value else: logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) assert( V is not None ) assert( V ) self.thisBook.appendLine( 'v', V + ((' '+vTail) if vTail else '' ) ) elif element.tag == 've': # verse end milestone -- we can just ignore this Globals.checkXMLNoText( element, location, 'lsc3' ) Globals.checkXMLNoTail( element, location, 'mfy4' ) Globals.checkXMLNoAttributes( element, location, 'bd24' ) Globals.checkXMLNoSubelements( element, location, 'ks35' ) elif element.tag == 'fig': self.loadFigure( element, location ) elif element.tag == 'table': self.loadTable( element, location ) elif element.tag == 'f': #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) ) self.loadFootnote( element, location ) elif element.tag == 'x': #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) ) self.loadCrossreference( element, location ) elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting self.loadCharacterFormatting( element, location ) elif element.tag == 'cs': # character style -- seems like a USFX hack text, tail = clean(element.text), clean(element.tail) Globals.checkXMLNoSubelements( element, location, 'kf92' ) sfm = None for attrib,value in element.items(): if attrib == 'sfm': sfm = value else: logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) ) self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) ) elif element.tag in ('cp',): # Simple single-line paragraph-level markers marker, text = element.tag, clean(element.text) Globals.checkXMLNoTail( element, location, 'kdf0' ) Globals.checkXMLNoAttributes( element, location, 'lkj1' ) Globals.checkXMLNoSubelements( element, location, 'da13' ) self.thisBook.appendLine( marker, text ) elif element.tag == 'ref': # encoded reference -- seems like a USFX hack text, tail = clean(element.text), clean(element.tail) Globals.checkXMLNoSubelements( element, location, 'bd83' ) target = None for attrib,value in element.items(): if attrib == 'tgt': target = value else: logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) ) self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) ) #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) ) elif element.tag == 'optionalLineBreak': print( "What is loadParagraph optionalLineBreak?" ) if Globals.debugFlag: halt elif element.tag == 'milestone': print( "What is loadParagraph milestone?" ) if Globals.debugFlag: halt else: logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) ) return V # end of USFXXMLBible.loadParagraph def loadCharacterFormatting( self, element, location ): """ """ marker, text, tail = element.tag, clean(element.text), clean(element.tail) Globals.checkXMLNoAttributes( element, location, 'sd12' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) ) for subelement in element: sublocation = subelement.tag + " of " + location #print( "element", repr(element.tag) ) if subelement.tag == 'f': #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) ) self.loadFootnote( subelement, sublocation ) else: logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) ) halt self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) ) # end of USFXXMLBible.loadCharacterFormatting def loadFigure( self, element, location ): """ """ Globals.checkXMLNoText( element, location, 'ff36' ) Globals.checkXMLNoAttributes( element, location, 'cf35' ) figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' } for subelement in element: sublocation = subelement.tag + " of " + location figTag, figText = subelement.tag, clean(subelement.text) assert( figTag in figDict ) figDict[figTag] = '' if figText is None else figText Globals.checkXMLNoTail( subelement, sublocation, 'jkf5' ) Globals.checkXMLNoAttributes( subelement, sublocation, 'ld18' ) Globals.checkXMLNoSubelements( subelement, sublocation, 'hb46' ) newString = '' for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ): newString += ('' if j==0 else '|') + figDict[tag] figTail = clean( element.tail ) self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) ) # end of USFXXMLBible.loadFigure def loadTable( self, element, location ): """ """ Globals.checkXMLNoText( element, location, 'kg92' ) Globals.checkXMLNoTail( element, location, 'ka92' ) Globals.checkXMLNoAttributes( element, location, 'ks63' ) for subelement in element: sublocation = subelement.tag + " of " + location if subelement.tag == 'tr': #print( "table", sublocation ) self.thisBook.appendLine( 'tr', '' ) Globals.checkXMLNoText( subelement, sublocation, 'sg32' ) Globals.checkXMLNoTail( subelement, sublocation, 'dh82' ) Globals.checkXMLNoAttributes( subelement, sublocation, 'mniq' ) for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation tag, text = sub2element.tag, clean(sub2element.text) assert( tag in ('th', 'thr', 'tc', 'tcr',) ) Globals.checkXMLNoTail( sub2element, sub2location, 'ah82' ) Globals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' ) level = None for attrib,value in sub2element.items(): if attrib == 'level': level = value else: logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) marker = tag + (level if level else '') self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) ) else: logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) ) # end of USFXXMLBible.loadTable def loadFootnote( self, element, location ): """ """ text, tail = clean(element.text), clean(element.tail) caller = None for attrib,value in element.items(): if attrib == 'caller': caller = value else: logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) ) for subelement in element: sublocation = subelement.tag + " of " + location marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail) #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) ) #if Globals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',): #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) ) if Globals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',) ) if marker=='ref': assert( fText ) Globals.checkXMLNoSubelements( subelement, sublocation, 'ls13' ) target = None for attrib,value in subelement.items(): if attrib == 'tgt': target = value else: logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) ) else: halt else: Globals.checkXMLNoAttributes( subelement, sublocation, 'dq54' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) ) if marker[0] == 'f': # Starts with f, e.g., fr, ft for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail) Globals.checkXMLNoSubelements( sub2element, sub2location, 'js72' ) if marker2=='ref': print( sub2location ) assert( not fText2 ) target = None for attrib,value in sub2element.items(): if attrib == 'tgt': target = value else: logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) ) else: halt else: halt else: halt if fTail: self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) ) self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) ) # end of USFXXMLBible.loadFootnote def loadCrossreference( self, element, location ): """ Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x> """ text, tail = clean(element.text), clean(element.tail) caller = None for attrib,value in element.items(): if attrib == 'caller': caller = value else: logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) ) for subelement in element: sublocation = subelement.tag + " of " + location marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail) #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) ) #if Globals.verbosityLevel > 0 and marker not in ('ref','xo','xt',): #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) ) if Globals.debugFlag: assert( marker in ('ref','xo','xt',) ) if marker=='ref': assert( xText ) Globals.checkXMLNoSubelements( subelement, sublocation, 's1sd' ) target = None for attrib,value in subelement.items(): if attrib == 'tgt': target = value else: logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) ) else: halt else: Globals.checkXMLNoAttributes( subelement, sublocation, 'sc35' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) ) if marker[0] == 'x': # Starts with x, e.g., xo, xt for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail) Globals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' ) if marker2=='ref': assert( not xText2 ) target = None for attrib,value in sub2element.items(): if attrib == 'tgt': target = value else: logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) ) else: halt else: halt else: halt if xTail: self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) ) self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
class GreekNT( Bible ): """ Class for handling a Greek NT object (which may contain one or more Bible books) Note: BBB is used in this class to represent the three-character referenceAbbreviation. """ def __init__( self, sourceFilepath, givenName=None, encoding='utf-8' ): """ Constructor: expects the filepath of the source folder. Loads (and crudely validates the file(s)) into ???. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = 'Greek NT Bible object' self.objectTypeString = 'GreekNT' # Now we can set our object variables self.sourceFilepath, self.givenName, self.encoding = sourceFilepath, givenName, encoding self.title = self.version = self.date = None self.XMLTree = self.header = self.frontMatter = self.divs = self.divTypesString = None #self.bkData, self.USFMBooks = OrderedDict(), OrderedDict() self.lang = self.language = None # Do a preliminary check on the readability of our files self.possibleFilenames = [] if os.path.isdir( self.sourceFilepath ): # We've been given a folder -- see if we can find the files # There's no standard for OSIS xml file naming fileList = os.listdir( self.sourceFilepath ) #print( len(fileList), fileList ) # First try looking for OSIS book names for filename in fileList: if filename.lower().endswith('.txt'): thisFilepath = os.path.join( self.sourceFilepath, filename ) #if BibleOrgSysGlobals.debugFlag: print( "Trying {}…".format( thisFilepath ) ) if os.access( thisFilepath, os.R_OK ): # we can read that file self.possibleFilenames.append( filename ) elif not os.access( self.sourceFilepath, os.R_OK ): logging.critical( "GreekNT: File {!r} is unreadable".format( self.sourceFilepath ) ) return # No use continuing #print( self.possibleFilenames ); halt self.name = self.givenName #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML #gNTfc.loadMorphGNT() #self.books = gNTfc.bookData # end of __init__ #def x__str__( self ): #""" #This method returns the string representation of a Bible book code. #@return: the name of a Bible object formatted as a string #@rtype: string #""" #result = "Greek Bible converter object" ##if self.title: result += ('\n' if result else '') + self.title ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version ) ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date ) #if len(self.books)==1: #for BBB in self.books: break # Just get the first one #result += ('\n' if result else '') + " " + _("Contains one book: {}").format( BBB ) #else: result += ('\n' if result else '') + " " + _("Number of books = {}").format( len(self.books) ) #return result ## end of __str__ def loadBooks( self ): """ """ if BibleOrgSysGlobals.verbosityLevel > 2: print( "Loading Greek NT from {}…".format( self.sourceFilepath ) ) for BBB in Greek.morphgntBooks: self.loadBook( BBB, Greek.morphgntFilenames[BBB] ) if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} books loaded.".format( len(self.books) ) ) #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book #for filename in self.possibleFilenames: #pathname = os.path.join( self.sourceFilepath, filename ) #self.loadBook( pathname ) #else: # most often we have all the Bible books in one file #self.loadFile( self.sourceFilepath ) self.doPostLoadProcessing() # end of loadBooks def load( self ): self.loadBooks() def loadBook( self, BBB, filename, encoding='utf-8' ): def unpackLine( line ): # Should be seven parts in the line # 0 book/chapter/verse # 1 part of speech (POS) # 2 parsing code # 3 text (including punctuation) # 4 word (with punctuation stripped) # 5 normalized word # 6 lemma # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος # 180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή # 180102 P- -------- κατ’ κατ’ κατά κατά # 180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία bits = line.split() assert len(bits) == 7 #print( bits ) bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6] if bn[0]=='0': bn = bn[1:] # Remove any leading zero if cn[0]=='0': cn = cn[1:] # Remove any leading zero if vn[0]=='0': vn = vn[1:] # Remove any leading zero #print( b, c, v ) POSCode = bits[1] assert len(POSCode) == 2 assert POSCode in Greek.POSCodes.keys() parsingCode = bits[2] assert len(parsingCode) == 8 #print( parsingCode ) for j,char in enumerate(parsingCode): assert char in Greek.parsingCodes[j] assert parsingCode[0] in Greek.personCodes assert parsingCode[1] in Greek.tenseCodes assert parsingCode[2] in Greek.voiceCodes assert parsingCode[3] in Greek.modeCodes assert parsingCode[4] in Greek.caseCodes assert parsingCode[5] in Greek.numberCodes assert parsingCode[6] in Greek.genderCodes assert parsingCode[7] in Greek.degreeCodes return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],) # end of unpackLine self.thisBook = BibleBook( self, BBB ) self.thisBook.objectNameString = 'Morph Greek NT Bible Book object' self.thisBook.objectTypeString = 'MorphGNT' filepath = os.path.join( self.sourceFilepath, filename ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Loading {}…".format( filename ) ) lastLine, lineCount = '', 0 lastC = lastV = None with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done if 1: #try: for line in myFile: lineCount += 1 if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character #if not line: continue # Just discard blank lines lastLine = line #print ( 'gNT file line is "' + line + '"' ) #if line[0]=='#': continue # Just discard comment lines unpackedLine = unpackLine( line ) #print( unpackedLine ) ref, grammar, words = unpackedLine bn, cn, vn = ref POSCode, parsingCode = grammar word1, word2, word3, word4 = words if cn != lastC: self.thisBook.addLine( 'c', cn ) lastC, lastV = cn, None if vn != lastV: self.thisBook.addLine( 'v', vn ) lastV = vn self.thisBook.addLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) ) self.thisBook.addLine( 'g', "{}/{}".format( POSCode, parsingCode ) ) #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference #lineTuples.append( (reference,bits[1],bits[2],) ) #print( reference,bits[1],bits[2] ); halt #if 0: #except: #logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) ) #if lineCount > 1: print( 'Previous line was: ', lastLine ) #else: print( 'Possible encoding error -- expected', encoding ) if self.thisBook: if BibleOrgSysGlobals.verbosityLevel > 3: print( " {} words loaded from {}".format( len(self.thisBook), filename ) ) self.stashBook( self.thisBook ) #self.books[BBB] = self.thisBook # end of loadBook def analyzeWords( self ): """ Go through the NT data and do some filing and sorting of the Greek words. Used by the interlinearizer app. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( "analyzeWords: have {} books in the loaded NT".format( len(self.books) ) ) self.wordCounts = {} # Wordcount organised by BBB self.wordCounts['Total'] = 0 self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {} for BBB in self.books: wordCount = len(self.books[BBB]) self.wordCounts[BBB] = wordCount self.wordCounts['Total'] += wordCount if BibleOrgSysGlobals.verbosityLevel > 3: print( " analyzeWords: {} has {} Greek words".format( BBB, wordCount ) ) for reference,parsing,(punctuatedWord,actualWord,normalizedWord,lemma) in self.books[BBB]: # Stuff is: reference,parsing,words # File the actual words if actualWord not in self.actualWordsToNormalized: self.actualWordsToNormalized[actualWord] = [([reference],normalizedWord,)] #print( "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] ) else: # we've already had this word before previous = self.actualWordsToNormalized[actualWord] #print( "had", actualWord, "before with", previous, "now with", reference, normalizedWord ) found = changed = False newList = [] for oldRefList,oldnormalizedWord in previous: #print( " oRL", oldRefList, "oP", oldnormalizedWord ) if normalizedWord == oldnormalizedWord: assert not found if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldnormalizedWord,) ) changed = True found = True else: newList.append( (oldRefList,oldnormalizedWord,) ) if not found: #print( " Found a new", normalizedWord, "normalized word for", actualWord, "was", previous ) newList.append( ([reference],normalizedWord,) ) changed = True if changed: self.actualWordsToNormalized[actualWord] = newList #print( " now have", newList ) # File the normalized words if normalizedWord not in self.normalizedWordsToActual: self.normalizedWordsToActual[normalizedWord] = [([reference],actualWord,)] #print( "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] ) else: # we've already had this word before previous = self.normalizedWordsToActual[normalizedWord] #print( "had", normalizedWord, "before with", previous, "now with", reference, actualWord ) found = changed = False newList = [] for oldRefList,oldActualWord in previous: #print( " oRL", oldRefList, "oP", oldActualWord ) if actualWord == oldActualWord: assert not found if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldActualWord,) ) changed = True found = True else: newList.append( (oldRefList,oldActualWord,) ) if not found: newList.append( ([reference],actualWord,) ) changed = True if changed: self.normalizedWordsToActual[normalizedWord] = newList #print( " now have", newList ) if normalizedWord not in self.normalizedWordsToParsing: self.normalizedWordsToParsing[normalizedWord] = [([reference],parsing,)] #print( "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] ) else: # we've already had this word before previous = self.normalizedWordsToParsing[normalizedWord] #print( "had", normalizedWord, "before with", previous, "now with", reference, parsing ) found = changed = False newList = [] for oldRefList,oldParsing in previous: #print( " oRL", oldRefList, "oP", oldParsing ) if parsing == oldParsing: assert not found if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldParsing,) ) changed = True found = True else: newList.append( (oldRefList,oldParsing,) ) if not found: newList.append( ([reference],parsing,) ) changed = True if changed: self.normalizedWordsToParsing[normalizedWord] = newList #print( " now have", newList ) # File the self.lemmasToNormalizedWords if lemma not in self.lemmasToNormalizedWords: self.lemmasToNormalizedWords[lemma] = [([reference],normalizedWord,)] #print( "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] ) else: # we've already had this word before previous = self.lemmasToNormalizedWords[lemma] #print( "had", lemma, "before with", previous, "now with", reference, normalizedWord ) found = changed = False newList = [] for oldRefList,oldnormalizedWord in previous: #print( " oRL", oldRefList, "oP", oldnormalizedWord ) if normalizedWord == oldnormalizedWord: assert not found if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldnormalizedWord,) ) changed = True found = True else: newList.append( (oldRefList,oldnormalizedWord,) ) if not found: newList.append( ([reference],normalizedWord,) ) changed = True if changed: self.lemmasToNormalizedWords[lemma] = newList #print( " now have", newList ) if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek words".format( self.wordCounts['Total'] ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} actual Greek words".format( len(self.actualWordsToNormalized) ) ) if BibleOrgSysGlobals.verbosityLevel > 3: for j,aW in enumerate( self.actualWordsToNormalized.keys() ): print( " ", aW, self.actualWordsToNormalized[aW] ) if j==6: break if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToActual) ) ) if BibleOrgSysGlobals.verbosityLevel > 3: for j,nW in enumerate( self.normalizedWordsToActual.keys() ): print( " ", nW, self.normalizedWordsToActual[nW] ) if j==6: break if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToParsing) ) ) if BibleOrgSysGlobals.verbosityLevel > 3: for j,nW in enumerate( self.normalizedWordsToParsing.keys() ): print( " ", nW, self.normalizedWordsToParsing[nW] ) if j==6: break if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".format( len(self.lemmasToNormalizedWords) ) ) if BibleOrgSysGlobals.verbosityLevel > 3: for j,lem in enumerate( self.lemmasToNormalizedWords.keys() ): print( " ", lem, self.lemmasToNormalizedWords[lem] ) if j==6: break if 0: print( "The following actual words have multiple normalized forms:" ) for j,aW in enumerate( self.actualWordsToNormalized.keys() ): if len(self.actualWordsToNormalized[aW])>1: print( " ", aW ) for entry in self.actualWordsToNormalized[aW]: print( " ", entry[1], self.normalizedWordsToParsing[entry[1]], entry[0] )
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG') if BOS81 is None: BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG') if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG') if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount == 1: if self.encoding.lower() == 'utf-8' and line[0] == chr( 65279): #U+FEFF or \ufeff logging.info( " VPLBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[ 1:] # Remove the Unicode Byte Order Marker (BOM) # Try to identify the VPL type match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line) if match: vplType = 1 else: match = re.search('^(\\d{8})\\s', line) if match: vplType = 2 else: match = re.search('^# language_name:\\s', line) if match: vplType = 3 #else: #match = re.search( '^; TITLE:\\s', line ) #if match: vplType = 4 if match: if BibleOrgSysGlobals.debugFlag: print( "First line got type #{} {!r} match from {!r}". format(vplType, match.group(0), line)) else: if BibleOrgSysGlobals.verbosityLevel > 3: print( "VPLBible.load: (unexpected) first line was {!r} in {}" .format(line, self.sourceFilepath)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print( 'vplType', vplType ) #print ( 'VPL file line is "' + line + '"' ) lastLine = line # Process header stuff if vplType == 3: if line.startswith('# language_name:'): string = line[16:].strip() if string and string != 'Not available': settingsDict['LanguageName'] = string continue elif line.startswith('# closest ISO 639-3:'): string = line[20:].strip() if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string continue elif line.startswith('# year_short:'): string = line[13:].strip() if string and string != 'Not available': settingsDict['Year.short'] = string continue elif line.startswith('# year_long:'): string = line[12:].strip() if string and string != 'Not available': settingsDict['Year.long'] = string continue elif line.startswith('# title:'): string = line[8:].strip() if string and string != 'Not available': settingsDict['WorkTitle'] = string continue elif line.startswith('# URL:'): string = line[6:].strip() if string and string != 'Not available': settingsDict['URL'] = string continue elif line.startswith('# copyright_short:'): string = line[18:].strip() if string and string != 'Not available': settingsDict['Copyright.short'] = string continue elif line.startswith('# copyright_long:'): string = line[17:].strip() if string and string != 'Not available': settingsDict['Copyright.long'] = string continue elif line[0] == '#': logging.warning( "VPLBible.load {} is skipping unknown line: {}". format(vplType, line)) continue # Just discard comment lines #elif vplType == 4: #if line.startswith( '; TITLE:' ): #string = line[8:].strip() #if string: settingsDict['TITLE'] = string #continue #elif line.startswith( '; ABBREVIATION:' ): #string = line[15:].strip() #if string: settingsDict['ABBREVIATION'] = string #continue #elif line.startswith( '; HAS ITALICS:' ): #string = line[15:].strip() #if string: settingsDict['HAS_ITALICS'] = string #continue #elif line.startswith( '; HAS FOOTNOTES:' ): #string = line[15:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS FOOTNOTES' ): #string = line[14:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS REDLETTER:' ): #string = line[15:].strip() #if string: settingsDict['HAS_REDLETTER'] = string #continue #elif line[0]==';': #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) ) #continue # Just discard comment lines # Process the main segment if vplType == 1: bits = line.split(' ', 2) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCodeText, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':') #print( "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) ) if chapterNumberString == '': chapterNumberString = '1' # Handle a bug in some single chapter books in VPL else: print("Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits) if not bookCodeText and not chapterNumberString and not verseNumberString: print("Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) continue if BibleOrgSysGlobals.debugFlag: assert 2 <= len(bookCodeText) <= 4 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}". format(bookCodeText, BBB, chapterNumberString, verseNumberString)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit() continue chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB #if bookCodeText in ('Ge',): BBB = 'GEN' if bookCodeText == 'Le' and lastBBB == 'GEN': BBB = 'LEV' elif bookCodeText in ('Jud', ) and lastBBB == 'JOS': BBB = 'JDG' #elif bookCodeText in ('Es',): BBB = 'EST' #elif bookCodeText in ('Pr',): BBB = 'PRO' #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG' #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM' #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP' #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT #elif bookCodeText in ('Jude',): BBB = 'JDE' #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ' #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN' else: BBB = BOS66.getBBBFromText( bookCodeText) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCodeText) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCodeText) # Try to guess if not BBB: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCodeText) # Try to guess if not BBB: logging.critical( "VPL Bible: Unable to determine book code from text {!r} after {!r}={}" .format(bookCodeText, lastBookCodeText, lastBBB)) halt # Handle special formatting # [square-brackets] are for Italicized words # <angle-brackets> are for the Words of Christ in Red # «chevrons» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0] == '«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB == 'PSA' and verseNumberString == '1': # Psalm title vBits = vText[1:].split('»') #print( "vBits", vBits ) thisBook.addLine('d', vBits[0]) # Psalm title vText = vBits[1].lstrip() # Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif vplType in (2, 3): bits = line.split('\t', 1) #print( self.givenName, BBB, bits ) bookNumberString, chapterNumberString, verseNumberString = bits[ 0][:2], bits[0][2:5], bits[0][5:] #print( bookNumberString, chapterNumberString, verseNumberString ) while len(chapterNumberString ) > 1 and chapterNumberString[0] == '0': chapterNumberString = chapterNumberString[ 1:] # Remove leading zeroes while len(verseNumberString ) > 1 and verseNumberString[0] == '0': verseNumberString = verseNumberString[ 1:] # Remove leading zeroes bookCodeText, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int( verseNumberString) vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \ .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \ .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!') if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB bnDict = { 67: 'TOB', 68: 'JDT', 69: 'ESG', 70: 'WIS', 71: 'SIR', 72: 'BAR', 73: 'LJE', 74: 'PAZ', 75: 'SUS', 76: 'BEL', 77: 'MA1', 78: 'MA2', 79: 'MA3', 80: 'MA4', 81: 'ES1', 82: 'ES2', 83: 'MAN', 84: 'PS2', 85: 'PSS', 86: 'ODE', } if 1 <= bookCodeText <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCodeText) else: BBB = bnDict[bookCodeText] #elif vplType == 4: #if line.startswith( '$$ ' ): #if metadataName and metadataContents: #settingsDict[metadataName] = metadataContents #metadataName = None #pointer = line[3:] ##print( "pointer", repr(pointer) ) #if pointer and pointer[0]=='{' and pointer[-1]=='}': #metadataName = pointer[1:-1] #if metadataName: ##print( "metadataName", repr(metadataName) ) #metadataContents = '' #else: # let's assume it's a BCV reference #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) #B_CV_Bits = pointer.split( ' ', 1 ) #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: #bookCodeText, CVString = B_CV_Bits #chapterNumberString, verseNumberString = CVString.split( ':' ) #chapterNumber = int( chapterNumberString ) #verseNumber = int( verseNumberString ) #if bookCodeText != lastBookCodeText: # We've started a new book #if bookCodeText in ('Ge',): BBB = 'GEN' #elif bookCodeText in ('Le',): BBB = 'LEV' #elif bookCodeText in ('La',): BBB = 'LAM' #else: ##print( "4bookCodeText =", repr(bookCodeText) ) ##BBB = BOS.getBBBFromText( bookCodeText ) # Try to guess #BBB = BOS66.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText ) # Try to guess ##print( "4BBB =", repr(BBB) ) #else: print( "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits ) #continue # Just save the pointer information which refers to the text on the next line #else: # it's not a $$ line #text = line ##print( "text", repr(text) ) #if metadataName: #metadataContents += ('\n' if metadataContents else '') + text #continue #else: #vText = text ## Handle bits like (<scripref>Pr 2:7</scripref>) #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) ##if '\\' in vText: print( 'VPL vText', repr(vText) ) #if vplType == 4: # Forge for SwordSearcher ##print( BBB, chapterNumber, verseNumber, repr(vText) ) ## Convert {stuff} to footnotes #match = re.search( '\\{(.+?)\\}', vText ) #while match: #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\{(.+?)\\}', vText ) ## Convert [stuff] to added fields #match = re.search( '\\[(.+?)\\]', vText ) #while match: #addText = '\\add {}\\add*'.format( match.group(1) ) #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\[(.+?)\\]', vText ) #for badChar in '{}[]': #if badChar in vText: #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) #break else: logging.critical('Unknown VPL type {}'.format(vplType)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt if bookCodeText: if bookCodeText != lastBookCodeText: # We've started a new book if lastBookCodeText is not None: # Better save the last book self.stashBook(thisBook) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB)) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VPL Bible Book object' thisBook.objectTypeString = 'VPL' verseList = BOSx.getNumVersesList(BBB) numChapters, numVerses = len( verseList), verseList[0] lastBookCodeText = bookCodeText lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible{} could not figure out {!r} book code" .format(vplType, bookCodeText)) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}". format(self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})" .format(self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, numChapters)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}" ).format(self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString)) # Check for paragraph markers if vText and vText[0] == '¶': thisBook.addLine('p', '') vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber else: # No bookCodeText yet logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}" .format(vplType, line)) # Save the final book if thisBook is not None: self.stashBook(thisBook) # Clean up if settingsDict: #print( "VPL settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VPL'] = settingsDict self.applySuppliedMetadata('VPL') # Copy some to self.settingsDict self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) lastLine, lineCount = '', 0 BBB = None lastBookNumber = lastChapterNumber = lastVerseNumber = -1 lastVText = '' quoted = None with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " CSVBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if line==' ': continue # Handle special case which has blanks on every second line -- HACK lastLine = line #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) ) if line[0]=='#': continue # Just discard comment lines if lineCount==1: if line.startswith( '"Book",' ): quoted = True continue # Just discard header line elif line.startswith( 'Book,' ): quoted = False continue # Just discard header line bits = line.split( ',', 3 ) #print( lineCount, self.givenName, BBB, bits ) if len(bits) == 4: bString, chapterNumberString, verseNumberString, vText = bits #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) else: print( "Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits ) # Remove quote marks from these strings if quoted: if len(bString)>=2 and bString[0]==bString[-1] and bString[0] in '"\'': bString = bString[1:-1] if len(chapterNumberString)>=2 and chapterNumberString[0]==chapterNumberString[-1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1] if len(verseNumberString)>=2 and verseNumberString[0]==verseNumberString[-1] and verseNumberString[0] in '"\'': verseNumberString = verseNumberString[1:-1] if len(vText)>=2 and vText[0]==vText[-1] and vText[0] in '"\'': vText = vText[1:-1] #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) #if not bookCode and not chapterNumberString and not verseNumberString: #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if Globals.debugFlag: assert( 2 <= len(bookCode) <= 4 ) #if Globals.debugFlag: assert( chapterNumberString.isdigit() ) #if Globals.debugFlag: assert( verseNumberString.isdigit() ) bookNumber = int( bString ) chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookNumber != lastBookNumber: # We've started a new book if lastBookNumber != -1: # Better save the last book self.saveBook( thisBook ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) # Try to guess assert( BBB ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "CSV Bible Book object" thisBook.objectTypeString = "CSV" lastBookNumber = bookNumber lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) ) thisBook.appendLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Now we have to convert any possible RTF codes to our internal codes vTextOriginal = vText # First do special characters vText = vText.replace( '\\ldblquote', '“' ).replace( '\\rdblquote', '”' ).replace( '\\lquote', '‘' ).replace( '\\rquote', '’' ) vText = vText.replace( '\\emdash', '—' ).replace( '\\endash', '–' ) # Now do Unicode characters while True: # Find patterns like \\'d3 match = re.search( r"\\'[0-9a-f][0-9a-f]", vText ) if not match: break i = int( vText[match.start()+2:match.end()], 16 ) # Convert two hex characters to decimal vText = vText[:match.start()] + chr( i ) + vText[match.end():] while True: # Find patterns like \\u253? match = re.search( r"\\u[1-2][0-9][0-9]\?", vText ) if not match: break i = int( vText[match.start()+2:match.end()-1] ) # Convert three digits to decimal vText = vText[:match.start()] + chr( i ) + vText[match.end():] #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) ) ## Handle special formatting ## [brackets] are for Italicized words ## <brackets> are for the Words of Christ in Red ## «brackets» are for the Titles in the Book of Psalms. #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) #if vText and vText[0]=='«': #assert( BBB=='PSA' and verseNumberString=='1' ) #vBits = vText[1:].split( '»' ) ##print( "vBits", vBits ) #thisBook.appendLine( 'd', vBits[0] ) # Psalm title #vText = vBits[1].lstrip() # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.appendLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load the compressed data file and import book elements. """ import zlib if BibleOrgSysGlobals.verbosityLevel > 1: print(_("\nLoading {}…").format(self.sourceFilepath)) with open(self.sourceFilepath, 'rb') as myFile: # Automatically closes the file when done fileBytes = myFile.read() if BibleOrgSysGlobals.debugFlag: print(" {:,} bytes read".format(len(fileBytes))) keep = {} index = 0 #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] ) keep['block1'] = fileBytes[index:index + 32] hString = '' for j in range(0, 32): char8 = fileBytes[index + j] #print( char8, repr(char8) ) if char8 < 0x20: break hString += chr(char8) if BibleOrgSysGlobals.debugFlag: print('block1b', hexlify(fileBytes[index + j:index + 32])) # Skipped some (important?) binary here index += 32 if BibleOrgSysGlobals.debugFlag: print('hString', repr(hString), index) assert hString == 'EasyWorship Bible Text' #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] ) keep['block2'] = fileBytes[index:index + 56] nString = '' for j in range(0, 32): char8 = fileBytes[index + j] #print( char8, repr(char8) ) if char8 < 0x20: break nString += chr(char8) # Skipped some zeroes here index += 56 if BibleOrgSysGlobals.debugFlag: print('nString', repr(nString), index) self.name = nString rawBooks = [] for b in range(1, 66 + 1): bookAbbrev = '' for j in range(0, 32): char8 = fileBytes[index + j] #print( char8, repr(char8) ) if char8 < 0x20: break bookAbbrev += chr(char8) # Skipped some zeroes here index += 51 if bookAbbrev and bookAbbrev[-1] == '.': bookAbbrev = bookAbbrev[:-1] # Remove final period if BibleOrgSysGlobals.verbosityLevel > 2: print('bookAbbrev', repr(bookAbbrev)) numChapters = fileBytes[index] numVerses = [] for j in range(0, numChapters): numVerses.append(fileBytes[index + j + 1]) # Skipped some zeroes here index += 157 if BibleOrgSysGlobals.debugFlag: print(' ', numChapters, numVerses) bookStart, = struct.unpack("<I", fileBytes[index:index + 4]) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print(' bookStart', bookStart) bookLength, = struct.unpack("<I", fileBytes[index:index + 4]) # Skipped some zeroes here index += 8 if BibleOrgSysGlobals.debugFlag: print(' bookLength', bookLength, bookStart + bookLength) bookBytes = fileBytes[bookStart:bookStart + bookLength] assert bookBytes[ 0] == 0x78 and bookBytes[1] == 0xda # Zlib compression header rawBooks.append((bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes)) if BibleOrgSysGlobals.debugFlag: print('unknown block3', index, hexlify(fileBytes[index:index + 30])) keep['block3'] = fileBytes[index:index + 30] length3, = struct.unpack("<I", fileBytes[index:index + 4]) if length3: block3 = fileBytes[index + 4:index + 4 + length3 - 4] byteResult = zlib.decompress(block3) textResult = byteResult.decode('utf8') if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print("Got", len(textResult), textResult, 'from', length3) keep['block3n'] = textResult if self.name: print('Overwriting module name {!r} with {!r}'.format( self.name, textResult)) self.name = textResult index += length3 if BibleOrgSysGlobals.debugFlag: print('end of contents', index, hexlify(fileBytes[index:index + 60])) keep['block4'] = rawBooks[0][3] block5 = fileBytes[index:rawBooks[0][3]] keep['block5'] = block5 index += len(block5) #if self.abbreviation in ( 'TB', ): # Why don't the others work assert index == rawBooks[0][ 3] # Should now be at the start of the first book (already fetched above) assert len(rawBooks) == 66 # Look at extra stuff at end endBytes = fileBytes[bookStart + bookLength:] if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print('endBytes', len(endBytes), hexlify(endBytes), endBytes) assert len(endBytes) == 16 keep['block9'] = endBytes # Skipped some binary and some text here del fileBytes # Now we have to decode the book text (compressed about 4x with zlib) for j, BBB in enumerate(BOS.getBookList()): if BibleOrgSysGlobals.verbosityLevel > 2: print(' Decoding {}…'.format(BBB)) bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[ j] byteResult = zlib.decompress(bookBytes) textResult = byteResult.decode('utf8') if '\t' in textResult: logging.warning("Replacing tab characters in {} = {}".format( BBB, bookAbbrev)) textResult = textResult.replace('\t', ' ') #print( textResult ) if BibleOrgSysGlobals.strictCheckingFlag: assert ' ' not in textResult thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'EasyWorship Bible Book object' thisBook.objectTypeString = 'EasyWorship Bible' if bookAbbrev: thisBook.addLine('toc3', bookAbbrev) C, V = '0', '-1' # So id line starts at 0:0 for line in textResult.split('\r\n'): if not line: continue # skip blank lines if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print('Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line)) assert line[0].isdigit() assert ':' in line[:4] CV, verseText = line.split(' ', 1) newC, newV = CV.split(':') #print( newC, V, repr(verseText) ) if newC != C: if self.abbreviation == 'hcsb' and BBB in ( 'SA2', ): # Handle a bad bug -- chapter 24 has verses out of order print( "Skipping error for out-of-order chapters in {}!". format(BBB)) else: assert int(newC) > int(C) C, V = newC, '0' thisBook.addLine('c', C) if self.abbreviation == 'TB' and BBB == 'JOL': # Handle a bug -- chapter 3 repeats if int(newV) < int(V): break elif self.abbreviation == 'rsv' and BBB in ( 'EXO', 'HAG', ): # Handle a bug -- chapter 22 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) elif self.abbreviation == 'gnt' and BBB in ( 'ISA', 'ZEC', 'MRK', ): # Handle a bug -- chapter 38 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) elif self.abbreviation == 'hcsb' and BBB in ( 'SA2', ): # Handle a bug -- chapter 24 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) elif self.abbreviation == 'msg' and BBB in ( 'NUM', 'JDG', 'SA2', 'CH2', 'EZE', 'ACT', ): # Handle a bug -- chapter 24 has verses out of order print("Skipping error for out-of-order verses in {} {}". format(self.abbreviation, BBB)) else: try: assert int(newV) > int(V) except ValueError: if BibleOrgSysGlobals.debugFlag: print( "Something's not an integer around {} {}:{} {}" .format(BBB, C, V, verseText)) V = newV thisBook.addLine('v', V + ' ' + verseText) if BibleOrgSysGlobals.verbosityLevel > 3: print("Saving", BBB) self.stashBook(thisBook) self.doPostLoadProcessing() return keep
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) lastLine, lineCount = '', 0 BBB = None NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None subverseNumberString = sequenceNumberString = None lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1 lastVText = '' with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " UnboundBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'UB file line is "' + line + '"' ) if line[0]=='#': hashBits = line[1:].split( '\t' ) if len(hashBits)==2 and hashBits[1]: # We have some valid meta-data if hashBits[0] == 'name': self.name = hashBits[1] elif hashBits[0] == 'filetype': self.filetype = hashBits[1] elif hashBits[0] == 'copyright': self.copyright = hashBits[1] elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1] elif hashBits[0] == 'language': self.language = hashBits[1] elif hashBits[0] == 'note': self.note = hashBits[1] elif hashBits[0] == 'columns': self.columns = hashBits[1] # Should some of these be placed into self.settingsDict??? logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) ) continue # Just discard comment lines bits = line.split( '\t' ) #print( self.givenName, BBB, bits ) if len(bits) == 4: bookCode, chapterNumberString, verseNumberString, vText = bits elif len(bits) == 6: bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 9: NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_' ): logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt if NRSVA_bookCode: assert( len(NRSVA_bookCode) == 3 ) if NRSVA_chapterNumberString: assert( NRSVA_chapterNumberString.isdigit() ) if NRSVA_verseNumberString: assert( NRSVA_verseNumberString.isdigit() ) if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert( len(bookCode) == 3 ) if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() ) if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() ) if subverseNumberString: logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) ) vText = vText.strip() # Remove leading and trailing spaces if not vText: continue # Just ignore blank verses I think if vText == '+': continue # Not sure what this means in basic_english JHN 1:38 chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if sequenceNumberString: if BibleOrgSysGlobals.debugFlag: assert( sequenceNumberString.isdigit() ) sequenceNumber = int( sequenceNumberString ) if BibleOrgSysGlobals.debugFlag: assert( sequenceNumber > lastSequence or \ self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) ) # Why??? lastSequence = sequenceNumber if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.saveBook( thisBook ) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Unbound Bible Book object' thisBook.objectTypeString = 'Unbound' lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook( thisBook ) self.doPostLoadProcessing()
def loadBook( self, bookElement ): """ Load the book container from the XML data file. """ if Globals.verbosityLevel > 3: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) ) assert( bookElement.tag == 'book' ) mainLocation = self.name + " USFX book" # Process the attributes first bookCode = None for attrib,value in bookElement.items(): if attrib == 'id': bookCode = value else: logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) ) BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode ) mainLocation = "{} USFX {} book".format( self.name, BBB ) if Globals.verbosityLevel > 2: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) ) Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' ) Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' ) # Now create our actual book self.thisBook = BibleBook( self, BBB ) self.thisBook.objectNameString = "USFX XML Bible Book object" self.thisBook.objectTypeString = "USFX" C = V = '0' for element in bookElement: #print( "element", repr(element.tag) ) location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V ) if element.tag == 'id': idText = clean( element.text ) Globals.checkXMLNoTail( element, location, 'vsg3' ) Globals.checkXMLNoSubelements( element, location, 'ksq2' ) for attrib,value in element.items(): if attrib == 'id': assert( value == bookCode ) else: logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') ) elif element.tag == 'ide': ideText = clean( element.text ) Globals.checkXMLNoTail( element, location, 'jsa0' ) Globals.checkXMLNoSubelements( element, location, 'ls01' ) charset = None for attrib,value in element.items(): if attrib == 'charset': charset = value else: logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') ) elif element.tag == 'h': hText = element.text Globals.checkXMLNoTail( element, location, 'dj35' ) Globals.checkXMLNoAttributes( element, location, 'hs35' ) Globals.checkXMLNoSubelements( element, location, 'hs32' ) self.thisBook.appendLine( 'h', clean(hText) ) elif element.tag == 'toc': tocText = element.text Globals.checkXMLNoTail( element, location, 'ss13' ) Globals.checkXMLNoSubelements( element, location, 'js13' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems compulsory level = value else: logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'toc'+level, clean(tocText) ) elif element.tag == 'c': Globals.checkXMLNoText( element, location, 'ks35' ) Globals.checkXMLNoTail( element, location, 'gs35' ) Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone for attrib,value in element.items(): if attrib == 'id': C, V = value, '0' else: logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendLine( 'c', C ) elif element.tag == 's': sText = clean( element.text ) Globals.checkXMLNoTail( element, location, 'wxg0' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems optional level = value else: logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) marker = 's' if level: marker += level self.thisBook.appendLine( marker, sText ) for subelement in element: #print( "subelement", repr(subelement.tag) ) sublocation = subelement.tag + " of " + location if subelement.tag == 'f': self.loadFootnote( subelement, sublocation ) elif subelement.tag == 'x': self.loadCrossreference( subelement, sublocation ) elif subelement.tag == 'fig': self.loadFigure( subelement, sublocation ) elif subelement.tag == 'table': self.loadTable( subelement, sublocation ) elif subelement.tag in ('add','it','bd','bdit','sc',): self.loadCharacterFormatting( subelement, sublocation ) elif subelement.tag == 'optionalLineBreak': print( "What is loadBook optionalLineBreak?" ) else: logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) ) elif element.tag in ('p','q','d',): V = self.loadParagraph( element, location, C ) elif element.tag == 'b': Globals.checkXMLNoText( element, location, 'ks35' ) Globals.checkXMLNoTail( element, location, 'gs35' ) Globals.checkXMLNoAttributes( element, location, 'nd04' ) Globals.checkXMLNoSubelements( element, location, 'kdr3' ) self.thisBook.appendLine( 'b', '' ) elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers marker, text = element.tag, clean(element.text) Globals.checkXMLNoTail( element, location, 'od01' ) Globals.checkXMLNoAttributes( element, location, 'us91' ) Globals.checkXMLNoSubelements( element, location, 'gd92' ) self.thisBook.appendLine( marker, text ) elif element.tag == 'table': self.loadTable( element, location ) else: logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) ) #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if Globals.debugFlag: halt self.saveBook( self.thisBook )
def load( self ): """ Load a single source file and load book elements. """ if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) loadErrors = [] fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in filenameEndingsToAccept: logging.critical( "{} doesn't appear to be a e-Sword file".format( self.sourceFilename ) ) elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ): logging.critical( "{} doesn't appear to be a e-Sword Bible file".format( self.sourceFilename ) ) connection = sqlite3.connect( self.sourceFilepath ) connection.row_factory = sqlite3.Row # Enable row names cursor = connection.cursor() # First get the settings cursor.execute( 'select * from Details' ) row = cursor.fetchone() for key in row.keys(): self.settingsDict[key] = row[key] #print( self.settingsDict ); halt if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description'] if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) ) # Just get some information from the file cursor.execute( 'select * from Bible' ) rows = cursor.fetchall() numRows = len(rows) if Globals.debugFlag or Globals.verbosityLevel>2: print( '{} rows found'.format( numRows ) ) BBBn1 = rows[0][0] if Globals.debugFlag or Globals.verbosityLevel>2: print( 'First book number is {}'.format( BBBn1 ) ) del rows BBB1 = None if BBBn1 <= 66: BBB1 = Globals.BibleBooksCodes.getBBBFromReferenceNumber( BBBn1 ) testament = BBB = None booksExpected = textLineCountExpected = 0 if self.settingsDict['OT'] and self.settingsDict['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.settingsDict['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 elif self.settingsDict['Abbreviation'] == 'VIN2011': # Handle encoding error logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['Apocrypha']: # incomplete testament, BBB = 'AP', 'XXX' booksExpected, textLineCountExpected = 99, 999999 halt if not BBB: logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) ) if 0: cursor.execute( 'select * from Bible' ) rows = cursor.fetchall() print( "rows", len(rows) ) for row in rows: assert( len(row) == 4 ) BBBn, C, V, text = row # First three are integers, the last is a string print( BBBn, C, V, repr(text) ) if C==2: break del rows # Takes a lot of memory if Globals.debugFlag or Globals.verbosityLevel>2: print( "Testament={} BBB={} BBB1={}, bE={}, tLCE={} nR={}".format( testament, BBB, BBB1, booksExpected, textLineCountExpected, numRows ) ) if BBB1 != BBB: logging.critical( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) ) loadErrors.append( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) ) if not BBB: BBB = BBB1 if numRows != textLineCountExpected: logging.critical( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) ) loadErrors.append( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) ) #halt BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" ) # Create the first book thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "e-Sword Bible Book object" thisBook.objectTypeString = "e-Sword" verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = cursor.fetchone() line = row[0] except: # This reference is missing #print( "something wrong at", BBB, C, V ) #if Globals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'e-Sw file line is "' + line + '"' ) if line is None: logging.warning( "ESwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.settingsDict: logging.critical( "ESwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) ) break else: logging.critical( "ESwordBible.load: Probably encrypted module: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) ) break elif not line: logging.warning( "ESwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) if '\r' in line or '\n' in line: if Globals.debugFlag: logging.warning( "ESwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) #print( repr(line) ) while line and line[-1] in '\r\n': line = line[:-1] # Remove CR/LFs from the end line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) # Replace CR/LFs in the middle #print( "e-Sword.load", BBB, C, V, repr(line) ) self.handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 ) self.saveBook( thisBook ) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = BOS.getNextBookCode( BBB ) # Create the next book thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "e-Sword Bible Book object" thisBook.objectTypeString = "e-Sword" haveLines = False verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #thisBook.appendLine( 'c', str(C) ) else: # next chapter only #thisBook.appendLine( 'c', str(C) ) numV = verseList[C-1] V = 1 if ourGlobals['haveParagraph']: thisBook.appendLine( 'p', '' ) ourGlobals['haveParagraph'] = False if Globals.strictCheckingFlag or Globals.debugFlag: self.checkForExtraMaterial( cursor, BOS ) cursor.close() if loadErrors: self.errorDictionary['Load Errors'] = loadErrors self.doPostLoadProcessing()
class GreekNT( Bible ): """ Class for handling a Greek NT object (which may contain one or more Bible books) Note: BBB is used in this class to represent the three-character referenceAbbreviation. """ def __init__( self, sourceFilepath, givenName=None, encoding='utf-8' ): """ Constructor: expects the filepath of the source folder. Loads (and crudely validates the file(s)) into ???. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "Greek NT Bible object" self.objectTypeString = "GreekNT" # Now we can set our object variables self.sourceFilepath, self.givenName, self.encoding = sourceFilepath, givenName, encoding self.title = self.version = self.date = None self.tree = self.header = self.frontMatter = self.divs = self.divTypesString = None #self.bkData, self.USFMBooks = OrderedDict(), OrderedDict() self.lang = self.language = None # Do a preliminary check on the readability of our files self.possibleFilenames = [] if os.path.isdir( self.sourceFilepath ): # We've been given a folder -- see if we can find the files # There's no standard for OSIS xml file naming fileList = os.listdir( self.sourceFilepath ) #print( len(fileList), fileList ) # First try looking for OSIS book names for filename in fileList: if filename.lower().endswith('.txt'): thisFilepath = os.path.join( self.sourceFilepath, filename ) #if Globals.debugFlag: print( "Trying {}...".format( thisFilepath ) ) if os.access( thisFilepath, os.R_OK ): # we can read that file self.possibleFilenames.append( filename ) elif not os.access( self.sourceFilepath, os.R_OK ): logging.critical( "GreekNT: File '{}' is unreadable".format( self.sourceFilepath ) ) return # No use continuing #print( self.possibleFilenames ); halt self.name = self.givenName #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML #gNTfc.loadMorphGNT() #self.books = gNTfc.bookData # end of __init__ #def x__str__( self ): #""" #This method returns the string representation of a Bible book code. #@return: the name of a Bible object formatted as a string #@rtype: string #""" #result = "Greek Bible converter object" ##if self.title: result += ('\n' if result else '') + self.title ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version ) ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date ) #if len(self.books)==1: #for BBB in self.books: break # Just get the first one #result += ('\n' if result else '') + " " + _("Contains one book: {}").format( BBB ) #else: result += ('\n' if result else '') + " " + _("Number of books = {}").format( len(self.books) ) #return result ## end of __str__ def load( self ): if Globals.verbosityLevel > 2: print( "Loading Greek NT from {}...".format( self.sourceFilepath ) ) for BBB in Greek.morphgntBooks: self.loadBook( BBB, Greek.morphgntFilenames[BBB] ) if Globals.verbosityLevel > 3: print( "{} books loaded.".format( len(self.books) ) ) #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book #for filename in self.possibleFilenames: #pathname = os.path.join( self.sourceFilepath, filename ) #self.loadBook( pathname ) #else: # most often we have all the Bible books in one file #self.loadFile( self.sourceFilepath ) self.doPostLoadProcessing() # end of load def loadBook( self, BBB, filename, encoding='utf-8' ): def unpackLine( line ): # Should be seven parts in the line # 0 book/chapter/verse # 1 part of speech (POS) # 2 parsing code # 3 text (including punctuation) # 4 word (with punctuation stripped) # 5 normalized word # 6 lemma # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος # 180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή # 180102 P- -------- κατ’ κατ’ κατά κατά # 180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία bits = line.split() assert( len(bits) == 7 ) #print( bits ) bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6] if bn[0]=='0': bn = bn[1:] # Remove any leading zero if cn[0]=='0': cn = cn[1:] # Remove any leading zero if vn[0]=='0': vn = vn[1:] # Remove any leading zero #print( b, c, v ) POSCode = bits[1] assert( len(POSCode) == 2 ) assert( POSCode in Greek.POSCodes.keys() ) parsingCode = bits[2] assert( len(parsingCode) == 8 ) #print( parsingCode ) for j,char in enumerate(parsingCode): assert( char in Greek.parsingCodes[j] ) assert( parsingCode[0] in Greek.personCodes ) assert( parsingCode[1] in Greek.tenseCodes ) assert( parsingCode[2] in Greek.voiceCodes ) assert( parsingCode[3] in Greek.modeCodes ) assert( parsingCode[4] in Greek.caseCodes ) assert( parsingCode[5] in Greek.numberCodes ) assert( parsingCode[6] in Greek.genderCodes ) assert( parsingCode[7] in Greek.degreeCodes ) return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],) # end of unpackLine self.thisBook = BibleBook( self.name, BBB ) self.thisBook.objectNameString = "Morph Greek NT Bible Book object" self.thisBook.objectTypeString = "MorphGNT" filepath = os.path.join( self.sourceFilepath, filename ) if Globals.verbosityLevel > 2: print( " Loading {}...".format( filename ) ) lastLine, lineCount = '', 0 lastC = lastV = None with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done if 1: #try: for line in myFile: lineCount += 1 if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF logging.info( "GreekNT: Detected UTF-16 Byte Order Marker in {}".format( filename ) ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line = line[:-1] # Removing trailing newline character #if not line: continue # Just discard blank lines lastLine = line #print ( 'gNT file line is "' + line + '"' ) #if line[0]=='#': continue # Just discard comment lines unpackedLine = unpackLine( line ) #print( unpackedLine ) ref, grammar, words = unpackedLine bn, cn, vn = ref POSCode, parsingCode = grammar word1, word2, word3, word4 = words if cn != lastC: self.thisBook.appendLine( 'c', cn ) lastC, lastV = cn, None if vn != lastV: self.thisBook.appendLine( 'v', vn ) lastV = vn self.thisBook.appendLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) ) self.thisBook.appendLine( 'g', "{}/{}".format( POSCode, parsingCode ) ) #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference #lineTuples.append( (reference,bits[1],bits[2],) ) #print( reference,bits[1],bits[2] ); halt if 0: #except: logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) ) if lineCount > 1: print( 'Previous line was: ', lastLine ) else: print( 'Possible encoding error -- expected', encoding ) if self.thisBook: if Globals.verbosityLevel > 3: print( " {} words loaded from {}".format( len(self.thisBook), filename ) ) self.saveBook( self.thisBook ) #self.books[BBB] = self.thisBook # end of loadBook def xanalyzeWords( self ): """ Go through the NT data and do some filing and sorting of the Greek words. """ if Globals.verbosityLevel > 3: print( "analyzeWords: have {} books in the loaded NT".format( len(self.books) ) ) self.wordCounts = {} # Wordcount organized by BBB self.wordCounts['Total'] = 0 self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {} for BBB in self.books: wordCount = len(self.books[BBB]) self.wordCounts[BBB] = wordCount self.wordCounts['Total'] += wordCount if Globals.verbosityLevel > 3: print( " analyzeWords: {} has {} Greek words".format( BBB, wordCount ) ) for reference,parsing,(punctuatedWord,actualWord,normalizedWord,lemma) in self.books[BBB]: # Stuff is: reference,parsing,words # File the actual words if actualWord not in self.actualWordsToNormalized: self.actualWordsToNormalized[actualWord] = [([reference],normalizedWord,)] #print( "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] ) else: # we've already had this word before previous = self.actualWordsToNormalized[actualWord] #print( "had", actualWord, "before with", previous, "now with", reference, normalizedWord ) found = changed = False newList = [] for oldRefList,oldnormalizedWord in previous: #print( " oRL", oldRefList, "oP", oldnormalizedWord ) if normalizedWord == oldnormalizedWord: assert( not found ) if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldnormalizedWord,) ) changed = True found = True else: newList.append( (oldRefList,oldnormalizedWord,) ) if not found: #print( " Found a new", normalizedWord, "normalized word for", actualWord, "was", previous ) newList.append( ([reference],normalizedWord,) ) changed = True if changed: self.actualWordsToNormalized[actualWord] = newList #print( " now have", newList ) # File the normalized words if normalizedWord not in self.normalizedWordsToActual: self.normalizedWordsToActual[normalizedWord] = [([reference],actualWord,)] #print( "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] ) else: # we've already had this word before previous = self.normalizedWordsToActual[normalizedWord] #print( "had", normalizedWord, "before with", previous, "now with", reference, actualWord ) found = changed = False newList = [] for oldRefList,oldActualWord in previous: #print( " oRL", oldRefList, "oP", oldActualWord ) if actualWord == oldActualWord: assert( not found ) if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldActualWord,) ) changed = True found = True else: newList.append( (oldRefList,oldActualWord,) ) if not found: newList.append( ([reference],actualWord,) ) changed = True if changed: self.normalizedWordsToActual[normalizedWord] = newList #print( " now have", newList ) if normalizedWord not in self.normalizedWordsToParsing: self.normalizedWordsToParsing[normalizedWord] = [([reference],parsing,)] #print( "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] ) else: # we've already had this word before previous = self.normalizedWordsToParsing[normalizedWord] #print( "had", normalizedWord, "before with", previous, "now with", reference, parsing ) found = changed = False newList = [] for oldRefList,oldParsing in previous: #print( " oRL", oldRefList, "oP", oldParsing ) if parsing == oldParsing: assert( not found ) if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldParsing,) ) changed = True found = True else: newList.append( (oldRefList,oldParsing,) ) if not found: newList.append( ([reference],parsing,) ) changed = True if changed: self.normalizedWordsToParsing[normalizedWord] = newList #print( " now have", newList ) # File the self.lemmasToNormalizedWords if lemma not in self.lemmasToNormalizedWords: self.lemmasToNormalizedWords[lemma] = [([reference],normalizedWord,)] #print( "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] ) else: # we've already had this word before previous = self.lemmasToNormalizedWords[lemma] #print( "had", lemma, "before with", previous, "now with", reference, normalizedWord ) found = changed = False newList = [] for oldRefList,oldnormalizedWord in previous: #print( " oRL", oldRefList, "oP", oldnormalizedWord ) if normalizedWord == oldnormalizedWord: assert( not found ) if reference not in oldRefList: oldRefList.append( reference ) newList.append( (oldRefList,oldnormalizedWord,) ) changed = True found = True else: newList.append( (oldRefList,oldnormalizedWord,) ) if not found: newList.append( ([reference],normalizedWord,) ) changed = True if changed: self.lemmasToNormalizedWords[lemma] = newList #print( " now have", newList ) if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek words".format( self.wordCounts['Total'] ) ) if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} actual Greek words".format( len(self.actualWordsToNormalized) ) ) if Globals.verbosityLevel > 3: for j,aW in enumerate( self.actualWordsToNormalized.keys() ): print( " ", aW, self.actualWordsToNormalized[aW] ) if j==6: break if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToActual) ) ) if Globals.verbosityLevel > 3: for j,nW in enumerate( self.normalizedWordsToActual.keys() ): print( " ", nW, self.normalizedWordsToActual[nW] ) if j==6: break if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToParsing) ) ) if Globals.verbosityLevel > 3: for j,nW in enumerate( self.normalizedWordsToParsing.keys() ): print( " ", nW, self.normalizedWordsToParsing[nW] ) if j==6: break if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".format( len(self.lemmasToNormalizedWords) ) ) if Globals.verbosityLevel > 3: for j,lem in enumerate( self.lemmasToNormalizedWords.keys() ): print( " ", lem, self.lemmasToNormalizedWords[lem] ) if j==6: break if 0: print( "The following actual words have multiple normalized forms:" ) for j,aW in enumerate( self.actualWordsToNormalized.keys() ): if len(self.actualWordsToNormalized[aW])>1: print( " ", aW ) for entry in self.actualWordsToNormalized[aW]: print( " ", entry[1], self.normalizedWordsToParsing[entry[1]], entry[0] )
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' ) if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' ) if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' ) if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 vplType = bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount==1: if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff logging.info( " VPLBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) # Try to identify the VPL type match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line ) if match: vplType = 1 else: match = re.search( '^(\\d{8})\\s', line ) if match: vplType = 2 else: match = re.search( '^# language_name:\\s', line ) if match: vplType = 3 #else: #match = re.search( '^; TITLE:\\s', line ) #if match: vplType = 4 if match: if BibleOrgSysGlobals.debugFlag: print( "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) ) else: if BibleOrgSysGlobals.verbosityLevel > 2: print( "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print( 'vplType', vplType ) #print ( 'VPL file line is "' + line + '"' ) lastLine = line # Process header stuff if vplType == 3: if line.startswith( '# language_name:' ): string = line[16:].strip() if string and string != 'Not available': settingsDict['LanguageName'] = string continue elif line.startswith( '# closest ISO 639-3:' ): string = line[20:].strip() if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string continue elif line.startswith( '# year_short:' ): string = line[13:].strip() if string and string != 'Not available': settingsDict['Year.short'] = string continue elif line.startswith( '# year_long:' ): string = line[12:].strip() if string and string != 'Not available': settingsDict['Year.long'] = string continue elif line.startswith( '# title:' ): string = line[8:].strip() if string and string != 'Not available': settingsDict['WorkTitle'] = string continue elif line.startswith( '# URL:' ): string = line[6:].strip() if string and string != 'Not available': settingsDict['URL'] = string continue elif line.startswith( '# copyright_short:' ): string = line[18:].strip() if string and string != 'Not available': settingsDict['Copyright.short'] = string continue elif line.startswith( '# copyright_long:' ): string = line[17:].strip() if string and string != 'Not available': settingsDict['Copyright.long'] = string continue elif line[0]=='#': logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) ) continue # Just discard comment lines #elif vplType == 4: #if line.startswith( '; TITLE:' ): #string = line[8:].strip() #if string: settingsDict['TITLE'] = string #continue #elif line.startswith( '; ABBREVIATION:' ): #string = line[15:].strip() #if string: settingsDict['ABBREVIATION'] = string #continue #elif line.startswith( '; HAS ITALICS:' ): #string = line[15:].strip() #if string: settingsDict['HAS_ITALICS'] = string #continue #elif line.startswith( '; HAS FOOTNOTES:' ): #string = line[15:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS FOOTNOTES' ): #string = line[14:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS REDLETTER:' ): #string = line[15:].strip() #if string: settingsDict['HAS_REDLETTER'] = string #continue #elif line[0]==';': #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) ) #continue # Just discard comment lines # Process the main segment if vplType == 1: bits = line.split( ' ', 2 ) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCode, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':' ) else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert 2 <= len(bookCode) <= 4 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit() continue chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCode != lastBookCode: # We've started a new book #if bookCode in ('Ge',): BBB = 'GEN' if bookCode in ('Le',): BBB = 'LEV' elif bookCode in ('Jud',): BBB = 'JDG' #elif bookCode in ('Es',): BBB = 'EST' #elif bookCode in ('Pr',): BBB = 'PRO' elif bookCode in ('So',): BBB = 'SNG' elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode ) # Try to guess # Handle special formatting # [square-brackets] are for Italicized words # <angle-brackets> are for the Words of Christ in Red # «chevrons» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0]=='«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB=='PSA' and verseNumberString=='1': # Psalm title vBits = vText[1:].split( '»' ) #print( "vBits", vBits ) thisBook.addLine( 'd', vBits[0] ) # Psalm title vText = vBits[1].lstrip() # Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif vplType in (2,3): bits = line.split( '\t', 1 ) #print( self.givenName, BBB, bits ) bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:] #print( bookNumberString, chapterNumberString, verseNumberString ) while len(chapterNumberString)>1 and chapterNumberString[0]=='0': chapterNumberString = chapterNumberString[1:] # Remove leading zeroes while len(verseNumberString)>1 and verseNumberString[0]=='0': verseNumberString = verseNumberString[1:] # Remove leading zeroes bookCode, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString) vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \ .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \ .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!') if bookCode != lastBookCode: # We've started a new book bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS', 76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2', 85:'PSS', 86:'ODE', } if 1 <= bookCode <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCode ) else: BBB = bnDict[bookCode] #elif vplType == 4: #if line.startswith( '$$ ' ): #if metadataName and metadataContents: #settingsDict[metadataName] = metadataContents #metadataName = None #pointer = line[3:] ##print( "pointer", repr(pointer) ) #if pointer and pointer[0]=='{' and pointer[-1]=='}': #metadataName = pointer[1:-1] #if metadataName: ##print( "metadataName", repr(metadataName) ) #metadataContents = '' #else: # let's assume it's a BCV reference #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) #B_CV_Bits = pointer.split( ' ', 1 ) #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: #bookCode, CVString = B_CV_Bits #chapterNumberString, verseNumberString = CVString.split( ':' ) #chapterNumber = int( chapterNumberString ) #verseNumber = int( verseNumberString ) #if bookCode != lastBookCode: # We've started a new book #if bookCode in ('Ge',): BBB = 'GEN' #elif bookCode in ('Le',): BBB = 'LEV' #elif bookCode in ('La',): BBB = 'LAM' #else: ##print( "4BookCode =", repr(bookCode) ) ##BBB = BOS.getBBBFromText( bookCode ) # Try to guess #BBB = BOS66.getBBBFromText( bookCode ) # Try to guess #if not BBB: BBB = BOS81.getBBBFromText( bookCode ) # Try to guess #if not BBB: BBB = BOSx.getBBBFromText( bookCode ) # Try to guess ##print( "4BBB =", repr(BBB) ) #else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) #continue # Just save the pointer information which refers to the text on the next line #else: # it's not a $$ line #text = line ##print( "text", repr(text) ) #if metadataName: #metadataContents += ('\n' if metadataContents else '') + text #continue #else: #vText = text ## Handle bits like (<scripref>Pr 2:7</scripref>) #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) ##if '\\' in vText: print( 'VPL vText', repr(vText) ) #if vplType == 4: # Forge for SwordSearcher ##print( BBB, chapterNumber, verseNumber, repr(vText) ) ## Convert {stuff} to footnotes #match = re.search( '\\{(.+?)\\}', vText ) #while match: #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\{(.+?)\\}', vText ) ## Convert [stuff] to added fields #match = re.search( '\\[(.+?)\\]', vText ) #while match: #addText = '\\add {}\\add*'.format( match.group(1) ) #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk ##print( BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\[(.+?)\\]', vText ) #for badChar in '{}[]': #if badChar in vText: #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) #break else: logging.critical( 'Unknown VPL type {}'.format( vplType ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook( thisBook ) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) ) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VPL Bible Book object' thisBook.objectTypeString = 'VPL' verseList = BOSx.getNumVersesList( BBB ) numChapters, numVerses = len(verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCode ) ) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) # Check for paragraph markers if vText and vText[0]=='¶': thisBook.addLine( 'p', '' ) vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) ) # Save the final book if thisBook is not None: self.stashBook( thisBook ) # Clean up if settingsDict: #print( "VPL settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VPL'] = settingsDict self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) lastLine, lineCount = '', 0 BBB = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF logging.info( " VPLBible.load: Detected UTF-16 Byte Order Marker" ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'VLP file line is "' + line + '"' ) if line[0]=='#': continue # Just discard comment lines bits = line.split( ' ', 2 ) #print( self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCode, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':' ) else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert( 2 <= len(bookCode) <= 4 ) if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() ) if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert( verseNumberString.isdigit() ) continue chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.saveBook( thisBook ) #if bookCode in ('Ge',): BBB = 'GEN' #elif bookCode in ('Le',): BBB = 'LEV' ##elif bookCode in ('Jud',): BBB = 'JDG' #elif bookCode in ('Es',): BBB = 'EST' #elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB( bookCode ) # Try to guess if BBB: thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "VPL Bible Book object" thisBook.objectTypeString = "VPL" lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible could not figure out {!r} book code".format( bookCode ) ) if BibleOrgSysGlobals.debugFlag: halt if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle special formatting # [brackets] are for Italicized words # <brackets> are for the Words of Christ in Red # «brackets» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0]=='«': #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB=='PSA' and verseNumberString=='1': # Psalm title vBits = vText[1:].split( '»' ) #print( "vBits", vBits ) thisBook.addLine( 'd', vBits[0] ) # Psalm title vText = vBits[1].lstrip() # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook( thisBook ) self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) def decodeVerse( encodedVerseString ): """ Decodes the verse which has @ format codes. """ verseString = encodedVerseString if verseString.startswith( '@@' ): # This simply means that encoding follows verseString = verseString[2:] if verseString.startswith( '@@' ): # This simply means that encoding follows verseString = verseString[2:] # Paragraph markers (marked now with double backslash) verseString = verseString.replace( '@^', '\\\\p ' ) verseString = verseString.replace( '@0', '\\\\m ' ) verseString = verseString.replace( '@1', '\\\\q1 ' ).replace( '@2', '\\\\q2 ' ).replace( '@3', '\\\\q3 ' ).replace( '@4', '\\q4 ' ) verseString = verseString.replace( '@8', '\\\\m ' ) # Character markers (marked now with single backslash) verseString = verseString.replace( '@6', '\\wj ' ).replace( '@5', '\\wj*' ) verseString = verseString.replace( '@9', '\\add ' ).replace( '@7', '\\add*' ) # or \\i ??? verseString = re.sub( r'@<f([0-9])@>@/', r'\\ff\1', verseString ) verseString = re.sub( r'@<x([0-9])@>@/', r'\\xx\1', verseString ) #print( repr( verseString ) ) assert( '@' not in verseString ) return verseString # end of decodeVerse # Read all the lines into bookDict lastLine, lineCount = '', 0 bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(), OrderedDict(), {}, {}, {} BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = '' lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " YETBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'YETBible file line is "' + line + '"' ) bits = line.split( '\t' ) #print( self.givenName, BBB, bits ) if bits[0] == 'info': assert( len(bits) == 3 ) if bits[1] == 'shortName': shortName = bits[2] self.name = shortName elif bits[1] == 'longName': longName = bits[2] elif bits[1] == 'description': description = bits[2] elif bits[1] == 'locale': locale = bits[2] assert( 2 <= len(locale) <= 3 ) if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding else: logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \ .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) ) continue elif bits[0] == 'book_name': assert( 3 <= len(bits) <= 4 ) thisBBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1] ) if len(bits) == 3: bookNameDict[thisBBB] = bits[2], '' elif len(bits) == 4: bookNameDict[thisBBB] = bits[2], bits[3] continue elif bits[0] == 'verse': assert( len(bits) == 5 ) bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) ) if BBB != lastBBB: # We have a new book if lastBBB is not None: # We have a completed book to save bookDict[lastBBB] = bookLines assert( BBB in bookNameDict ) bookLines = OrderedDict() # Keys are (C,V) strings verseString = decodeVerse( encodedVerseString ) bookLines[(chapterNumberString,verseNumberString)] = verseString # Just store it for now lastBBB = BBB continue elif bits[0] == 'pericope': assert( len(bits) == 5 ) bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) headingString = encodedHeadingString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' ) #print( repr(encodedHeadingString), repr(headingString) ) assert( '@' not in headingString ) headingDict[(BBB,chapterNumberString,verseNumberString)] = headingString, [] # Blank refList continue elif bits[0] == 'parallel': # These lines optionally follow pericope lines assert( len(bits) == 2 ) heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)] refList.append( bits[1] ) #print( "parallel2", repr(heading), refList ) headingDict[(BBB,chapterNumberString,verseNumberString)] = heading, refList continue elif bits[0] == 'xref': assert( len(bits) == 6 ) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) assert( indexNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' ) noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now noteString = noteString.replace( '@/', '' ) #print( repr(encodedNoteString), repr(noteString) ) assert( '@' not in noteString ) xrefDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString continue elif bits[0] == 'footnote': assert( len(bits) == 6 ) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:] if Globals.debugFlag: assert( bookNumberString.isdigit() ) assert( chapterNumberString.isdigit() ) assert( verseNumberString.isdigit() ) assert( indexNumberString.isdigit() ) BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString ) noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' ) assert( '@' not in noteString ) footnoteDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString continue else: print( "YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt bookDict[lastBBB] = bookLines # Save the last book #if bookCode != lastBookCode: # We've started a new book #if lastBookCode != -1: # Better save the last book #self.saveBook( thisBook ) #BBB = Globals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode ) #thisBook = BibleBook( self.name, BBB ) #thisBook.objectNameString = "YET Bible Book object" #thisBook.objectTypeString = "YET" #lastBookCode = bookCode #lastChapterNumber = lastVerseNumber = -1 #if chapterNumber != lastChapterNumber: # We've started a new chapter #if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception #if chapterNumber == 0: #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.appendLine( 'c', chapterNumberString ) #lastChapterNumber = chapterNumber #lastVerseNumber = -1 ## Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': ## Move Psalm titles to verse zero #verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.appendLine( 'v', verseNumberString + ' ' + vText ) #lastVText = vText #lastVerseNumber = verseNumber # Now process the books for BBB,bkData in bookDict.items(): #print( "Processing", BBB ) thisBook = BibleBook( self.name, BBB ) thisBook.objectNameString = "YET Bible Book object" thisBook.objectTypeString = "YET" lastChapterNumberString = None for (chapterNumberString,verseNumberString), verseString in bkData.items(): # Insert headings (can only occur before verses) if (BBB,chapterNumberString,verseNumberString) in headingDict: heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)] #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) thisBook.appendLine( 's', heading ) if refList: refString = "" #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) for ref in refList: refString += ('; ' if refString else '') + ref #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) ) thisBook.appendLine( 'r', '('+refString+')' ) # Insert footnotes and cross-references while( '\\ff' in verseString ): #print( "footnote", repr(verseString) ) fIx = verseString.index( '\\ff' ) caller = verseString[fIx+3] #print( "fcaller", repr(caller) ) assert( caller.isdigit() ) note = footnoteDict[(BBB,chapterNumberString,verseNumberString,caller)] #print( "fnote", repr(note) ) verseString = verseString[:fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[fIx+4:] #print( "fvS", repr(verseString) ) while( '\\xx' in verseString ): #print( "xref", repr(verseString) ) fIx = verseString.index( '\\xx' ) caller = verseString[fIx+3] #print( "xcaller", repr(caller) ) assert( caller.isdigit() ) note = xrefDict[(BBB,chapterNumberString,verseNumberString,caller)] #print( "xnote", repr(note) ) verseString = verseString[:fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[fIx+4:] #print( "xvS", repr(verseString) ) # Save the Bible data fields if chapterNumberString != lastChapterNumberString: thisBook.appendLine( 'c', chapterNumberString ) lastChapterNumberString = chapterNumberString #print( BBB, chapterNumberString, verseNumberString, repr(verseString) ) if verseString.startswith( '\\\\' ): # It's an initial paragraph marker if verseString[3]==' ': marker, verseString = verseString[2], verseString[4:] elif verseString[4]==' ': marker, verseString = verseString[2:4], verseString[5:] else: halt #print( '', '\\'+marker ) thisBook.appendLine( marker, '' ) assert( not verseString.startswith( '\\\\' ) ) bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers) for j,bit in enumerate(bits): #print( "loop", j, repr(bit), repr(verseString) ) if j==0: thisBook.appendLine( 'v', verseNumberString + ' ' + verseString.rstrip() ) else: if bit[1]==' ': marker, bit = bit[0], bit[2:] elif bit[2]==' ': marker, bit = bit[0:2], bit[3:] else: halt #print( "mV", marker, repr(bit), repr(verseString) ) thisBook.appendLine( marker, bit.rstrip() ) self.saveBook( thisBook ) self.doPostLoadProcessing()
def load(self): """ Load all the books out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(exp("load()")) assert self.preloadDone if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[ 'MySword']['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.suppliedMetadata['MySword']['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.suppliedMetadata['MySword']['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Have missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}" .format(BBB, C, V, line)) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}" .format(BBB, C, V, line, self.suppliedMetadata['MySword'])) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #print( "MySword.load", BBB, C, V, repr(line) ) handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print(" MySword saving", BBB, bookCount + 1) self.stashBook(thisBook) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB) # Create the next book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' haveLines = False verseList = self.BibleOrganisationalSystem.getNumVersesList( BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False self.cursor.close() del self.cursor self.applySuppliedMetadata('MySword') # Copy some to self.settingsDict self.doPostLoadProcessing()
def loadBook( self, BBB, filename, encoding='utf-8' ): def unpackLine( line ): # Should be seven parts in the line # 0 book/chapter/verse # 1 part of speech (POS) # 2 parsing code # 3 text (including punctuation) # 4 word (with punctuation stripped) # 5 normalized word # 6 lemma # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος # 180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή # 180102 P- -------- κατ’ κατ’ κατά κατά # 180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία bits = line.split() assert len(bits) == 7 #print( bits ) bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6] if bn[0]=='0': bn = bn[1:] # Remove any leading zero if cn[0]=='0': cn = cn[1:] # Remove any leading zero if vn[0]=='0': vn = vn[1:] # Remove any leading zero #print( b, c, v ) POSCode = bits[1] assert len(POSCode) == 2 assert POSCode in Greek.POSCodes.keys() parsingCode = bits[2] assert len(parsingCode) == 8 #print( parsingCode ) for j,char in enumerate(parsingCode): assert char in Greek.parsingCodes[j] assert parsingCode[0] in Greek.personCodes assert parsingCode[1] in Greek.tenseCodes assert parsingCode[2] in Greek.voiceCodes assert parsingCode[3] in Greek.modeCodes assert parsingCode[4] in Greek.caseCodes assert parsingCode[5] in Greek.numberCodes assert parsingCode[6] in Greek.genderCodes assert parsingCode[7] in Greek.degreeCodes return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],) # end of unpackLine self.thisBook = BibleBook( self, BBB ) self.thisBook.objectNameString = 'Morph Greek NT Bible Book object' self.thisBook.objectTypeString = 'MorphGNT' filepath = os.path.join( self.sourceFilepath, filename ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Loading {}…".format( filename ) ) lastLine, lineCount = '', 0 lastC = lastV = None with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done if 1: #try: for line in myFile: lineCount += 1 if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character #if not line: continue # Just discard blank lines lastLine = line #print ( 'gNT file line is "' + line + '"' ) #if line[0]=='#': continue # Just discard comment lines unpackedLine = unpackLine( line ) #print( unpackedLine ) ref, grammar, words = unpackedLine bn, cn, vn = ref POSCode, parsingCode = grammar word1, word2, word3, word4 = words if cn != lastC: self.thisBook.addLine( 'c', cn ) lastC, lastV = cn, None if vn != lastV: self.thisBook.addLine( 'v', vn ) lastV = vn self.thisBook.addLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) ) self.thisBook.addLine( 'g', "{}/{}".format( POSCode, parsingCode ) ) #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference #lineTuples.append( (reference,bits[1],bits[2],) ) #print( reference,bits[1],bits[2] ); halt #if 0: #except: #logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) ) #if lineCount > 1: print( 'Previous line was: ', lastLine ) #else: print( 'Possible encoding error -- expected', encoding ) if self.thisBook: if BibleOrgSysGlobals.verbosityLevel > 3: print( " {} words loaded from {}".format( len(self.thisBook), filename ) ) self.stashBook( self.thisBook )
def loadBook(self, BBB): """ Load the requested book out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(exp("loadBook( {} )").format(BBB)) assert self.preloadDone if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print(" {} is already loaded -- returning".format(BBB)) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath)) #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'BOTH', 'GEN' #booksExpected, textLineCountExpected = 1, 31102 #elif self.suppliedMetadata['MySword']['OT']: #testament, BBB = 'OT', 'GEN' #booksExpected, textLineCountExpected = 1, 23145 #elif self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'NT', 'MAT' #booksExpected, textLineCountExpected = 1, 7957 # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) C = V = 1 #bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Have missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}" .format(BBB, C, V, line)) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}" .format(BBB, C, V, line, self.suppliedMetadata['MySword'])) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #print( "MySword.load", BBB, C, V, repr(line) ) handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C <= numC: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 else: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 2: print(" MySword saving", BBB) self.stashBook(thisBook) #else: print( "Not saving", BBB ) break if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False
def loadBook( self, BBB, filename, encoding='utf-8' ): def unpackLine( line ): # Should be seven parts in the line # 0 book/chapter/verse # 1 part of speech (POS) # 2 parsing code # 3 text (including punctuation) # 4 word (with punctuation stripped) # 5 normalized word # 6 lemma # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος # 180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή # 180102 P- -------- κατ’ κατ’ κατά κατά # 180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία bits = line.split() assert( len(bits) == 7 ) #print( bits ) bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6] if bn[0]=='0': bn = bn[1:] # Remove any leading zero if cn[0]=='0': cn = cn[1:] # Remove any leading zero if vn[0]=='0': vn = vn[1:] # Remove any leading zero #print( b, c, v ) POSCode = bits[1] assert( len(POSCode) == 2 ) assert( POSCode in Greek.POSCodes.keys() ) parsingCode = bits[2] assert( len(parsingCode) == 8 ) #print( parsingCode ) for j,char in enumerate(parsingCode): assert( char in Greek.parsingCodes[j] ) assert( parsingCode[0] in Greek.personCodes ) assert( parsingCode[1] in Greek.tenseCodes ) assert( parsingCode[2] in Greek.voiceCodes ) assert( parsingCode[3] in Greek.modeCodes ) assert( parsingCode[4] in Greek.caseCodes ) assert( parsingCode[5] in Greek.numberCodes ) assert( parsingCode[6] in Greek.genderCodes ) assert( parsingCode[7] in Greek.degreeCodes ) return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],) # end of unpackLine self.thisBook = BibleBook( self.name, BBB ) self.thisBook.objectNameString = "Morph Greek NT Bible Book object" self.thisBook.objectTypeString = "MorphGNT" filepath = os.path.join( self.sourceFilepath, filename ) if Globals.verbosityLevel > 2: print( " Loading {}...".format( filename ) ) lastLine, lineCount = '', 0 lastC = lastV = None with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done if 1: #try: for line in myFile: lineCount += 1 if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF logging.info( "GreekNT: Detected UTF-16 Byte Order Marker in {}".format( filename ) ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1]=='\n': line = line[:-1] # Removing trailing newline character #if not line: continue # Just discard blank lines lastLine = line #print ( 'gNT file line is "' + line + '"' ) #if line[0]=='#': continue # Just discard comment lines unpackedLine = unpackLine( line ) #print( unpackedLine ) ref, grammar, words = unpackedLine bn, cn, vn = ref POSCode, parsingCode = grammar word1, word2, word3, word4 = words if cn != lastC: self.thisBook.appendLine( 'c', cn ) lastC, lastV = cn, None if vn != lastV: self.thisBook.appendLine( 'v', vn ) lastV = vn self.thisBook.appendLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) ) self.thisBook.appendLine( 'g', "{}/{}".format( POSCode, parsingCode ) ) #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference #lineTuples.append( (reference,bits[1],bits[2],) ) #print( reference,bits[1],bits[2] ); halt if 0: #except: logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) ) if lineCount > 1: print( 'Previous line was: ', lastLine ) else: print( 'Possible encoding error -- expected', encoding ) if self.thisBook: if Globals.verbosityLevel > 3: print( " {} words loaded from {}".format( len(self.thisBook), filename ) ) self.saveBook( self.thisBook )
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in filenameEndingsToAccept: logging.critical("{} doesn't appear to be a MySword file".format( self.sourceFilename)) elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0]): logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename)) connection = sqlite3.connect(self.sourceFilepath) connection.row_factory = sqlite3.Row # Enable row names cursor = connection.cursor() # First get the settings cursor.execute('select * from Details') row = cursor.fetchone() for key in row.keys(): self.settingsDict[key] = row[key] #print( self.settingsDict ); halt if 'Description' in self.settingsDict and len( self.settingsDict['Description']) < 40: self.name = self.settingsDict['Description'] if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.settingsDict: logging.critical("{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'])) if self.settingsDict['OT'] and self.settingsDict['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.settingsDict['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG") # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" verseList = BOS.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.settingsDict: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}" .format(BBB, C, V, repr(line))) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}" .format(BBB, C, V, repr(line), self.settingsDict)) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #print( "MySword.load", BBB, C, V, repr(line) ) handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print("Saving", BBB, bookCount + 1) self.saveBook(thisBook) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = BOS.getNextBookCode(BBB) # Create the next book thisBook = BibleBook(self, BBB) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" haveLines = False verseList = BOS.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False cursor.close() self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) status = 0 # 1 = getting chapters, 2 = getting verse data lastLine, lineCount = '', 0 BBB = lastBBB = None bookDetails = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1: if line[0] == chr(65279): #U+FEFF logging.info( "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}" .format(self.sourceFilepath)) line = line[ 1:] # Remove the UTF-16 Unicode Byte Order Marker (BOM) elif line[:3] == '': # 0xEF,0xBB,0xBF logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}" .format(self.sourceFilepath)) line = line[ 3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM) if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines #print ( 'DB file line is "' + line + '"' ) if line[0] == '#': continue # Just discard comment lines lastLine = line if lineCount == 1: if line != '*Bible': logging.warning( "Unknown DrupalBible first line: {}".format( repr(line))) elif status == 0: if line == '*Chapter': status = 1 else: # Get the version name details bits = line.split('|') shortName, fullName, language = bits self.name = fullName elif status == 1: if line == '*Context': status = 2 else: # Get the book name details bits = line.split('|') bookCode, bookFullName, bookShortName, numChapters = bits assert bookShortName == bookCode BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[ 0] # Result can be string or list of strings (best guess first) bookDetails[ BBB] = bookFullName, bookShortName, numChapters elif status == 2: # Get the verse text bits = line.split('|') bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString ) if lineMark: print(repr(lineMark)) halt BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode) BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[ 0] # Result can be string or list of strings (best guess first) if BBB != lastBBB: if lastBBB is not None: self.stashBook(thisBook) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'DrupalBible Bible Book object' thisBook.objectTypeString = 'DrupalBible' lastChapterNumberString = None lastBBB = BBB if chapterNumberString != lastChapterNumberString: thisBook.addLine('c', chapterNumberString) lastChapterNumberString = chapterNumberString verseText = verseText.replace('<', '\\it ').replace( '>', '\\it*') thisBook.addLine('v', verseNumberString + ' ' + verseText) else: halt # Save the final book self.stashBook(thisBook) self.doPostLoadProcessing()
def loadBook( self, bookElement ): """ Load the book container from the XML data file. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) ) assert( bookElement.tag == 'book' ) mainLocation = self.name + " USFX book" # Process the attributes first bookCode = None for attrib,value in bookElement.items(): if attrib == 'id': bookCode = value else: logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) ) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( bookCode ) mainLocation = "{} USFX {} book".format( self.name, BBB ) if BibleOrgSysGlobals.verbosityLevel > 2: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) ) BibleOrgSysGlobals.checkXMLNoText( self.tree, mainLocation, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, mainLocation, '1wk8' ) # Now create our actual book self.thisBook = BibleBook( self, BBB ) self.thisBook.objectNameString = "USFX XML Bible Book object" self.thisBook.objectTypeString = "USFX" C = V = '0' for element in bookElement: #print( "element", repr(element.tag) ) location = "{} of {} {}:{}".format( element.tag, mainLocation, BBB, C, V ) if element.tag == 'id': idText = clean( element.text ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'vsg3' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ksq2' ) for attrib,value in element.items(): if attrib == 'id': assert( value == bookCode ) else: logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'id', bookCode + ((' '+idText) if idText else '') ) elif element.tag == 'ide': ideText = clean( element.text ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'jsa0' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ls01' ) charset = None for attrib,value in element.items(): if attrib == 'charset': charset = value else: logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'ide', charset + ((' '+ideText) if ideText else '') ) elif element.tag == 'h': hText = element.text BibleOrgSysGlobals.checkXMLNoTail( element, location, 'dj35' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'hs35' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'hs32' ) self.thisBook.addLine( 'h', clean(hText) ) elif element.tag == 'toc': tocText = element.text BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ss13' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js13' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems compulsory level = value else: logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'toc'+level, clean(tocText) ) elif element.tag == 'c': BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone for attrib,value in element.items(): if attrib == 'id': C, V = value, '0' else: logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'c', C ) elif element.tag == 's': sText = clean( element.text ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'wxg0' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems optional level = value else: logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) marker = 's' if level: marker += level self.thisBook.addLine( marker, sText ) for subelement in element: #print( "subelement", repr(subelement.tag) ) sublocation = subelement.tag + " of " + location if subelement.tag == 'f': self.loadFootnote( subelement, sublocation, BBB, C, V ) elif subelement.tag == 'x': self.loadCrossreference( subelement, sublocation ) elif subelement.tag == 'fig': self.loadFigure( subelement, sublocation ) elif subelement.tag == 'table': self.loadTable( subelement, sublocation ) elif subelement.tag in ('add','it','bd','bdit','sc',): self.loadCharacterFormatting( subelement, sublocation, BBB, C, V ) elif subelement.tag == 'optionalLineBreak': print( "What is loadBook optionalLineBreak?" ) else: logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) ) elif element.tag in ('p','q','d',): V = self.loadParagraph( element, location, BBB, C ) elif element.tag == 'b': BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'nd04' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) self.thisBook.addLine( 'b', '' ) elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers marker, text = element.tag, clean(element.text) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'od01' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'gd92' ) idField = None for attrib,value in element.items(): if attrib == 'id': idField = value else: logging.warning( _("dv35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if idField and text is None: text = idField else: logging.warning( _("dve4 Unprocessed idField ({}) in {}").format( idField, location ) ) if text is None: logging.critical( "Why is {} empty at {}".format( marker, location ) ) assert( text is not None ) self.thisBook.addLine( marker, text ) elif element.tag == 'table': self.loadTable( element, location ) elif element.tag == 've': # What's this in Psalms: <c id="4" /><ve /><d>For the Chief Musician; on stringed instruments. A Psalm of David.</d> BibleOrgSysGlobals.checkXMLNoText( element, location, 'kds3' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ks29' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'kj24' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js91' ) #self.thisBook.addLine( 'b', '' ) if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring 've' field", BBB, C, V ) else: logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) ) #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt self.saveBook( self.thisBook )
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) lastLine, lineCount = '', 0 BBB = None lastBookNumber = lastChapterNumber = lastVerseNumber = -1 lastVText = '' quoted = None with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " CSVBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if line == ' ': continue # Handle special case which has blanks on every second line -- HACK lastLine = line #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) ) if line[0] == '#': continue # Just discard comment lines if lineCount == 1: if line.startswith('"Book",'): quoted = True continue # Just discard header line elif line.startswith('Book,'): quoted = False continue # Just discard header line bits = line.split(',', 3) #print( lineCount, self.givenName, BBB, bits ) if len(bits) == 4: bString, chapterNumberString, verseNumberString, vText = bits #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) else: print("Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits) # Remove quote marks from these strings if quoted: if len(bString) >= 2 and bString[0] == bString[ -1] and bString[0] in '"\'': bString = bString[1:-1] if len(chapterNumberString) >= 2 and chapterNumberString[ 0] == chapterNumberString[ -1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1] if len(verseNumberString) >= 2 and verseNumberString[ 0] == verseNumberString[-1] and verseNumberString[ 0] in '"\'': verseNumberString = verseNumberString[1:-1] if len(vText) >= 2 and vText[0] == vText[-1] and vText[ 0] in '"\'': vText = vText[1:-1] #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText ) #if not bookCode and not chapterNumberString and not verseNumberString: #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if BibleOrgSysGlobals.debugFlag: assert( 2 <= len(bookCode) <= 4 ) #if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() ) #if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() ) bookNumber = int(bString) chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookNumber != lastBookNumber: # We've started a new book if lastBookNumber != -1: # Better save the last book self.saveBook(thisBook) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) # Try to guess assert (BBB) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "CSV Bible Book object" thisBook.objectTypeString = "CSV" lastBookNumber = bookNumber lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert (chapterNumber > lastChapterNumber or BBB == 'ESG' ) # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Now we have to convert any possible RTF codes to our internal codes vTextOriginal = vText # First do special characters vText = vText.replace('\\ldblquote', '“').replace( '\\rdblquote', '”').replace('\\lquote', '‘').replace('\\rquote', '’') vText = vText.replace('\\emdash', '—').replace('\\endash', '–') # Now do Unicode characters while True: # Find patterns like \\'d3 match = re.search(r"\\'[0-9a-f][0-9a-f]", vText) if not match: break i = int(vText[match.start() + 2:match.end()], 16) # Convert two hex characters to decimal vText = vText[:match.start()] + chr( i) + vText[match.end():] while True: # Find patterns like \\u253? match = re.search(r"\\u[1-2][0-9][0-9]\?", vText) if not match: break i = int(vText[match.start() + 2:match.end() - 1]) # Convert three digits to decimal vText = vText[:match.start()] + chr( i) + vText[match.end():] #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) ) ## Handle special formatting ## [brackets] are for Italicized words ## <brackets> are for the Words of Christ in Red ## «brackets» are for the Titles in the Book of Psalms. #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) #if vText and vText[0]=='«': #assert( BBB=='PSA' and verseNumberString=='1' ) #vBits = vText[1:].split( '»' ) ##print( "vBits", vBits ) #thisBook.addLine( 'd', vBits[0] ) # Psalm title #vText = vBits[1].lstrip() # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}"). format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if BBB == 'PSA' and verseNumberString == '1' and vText.startswith( '<') and self.givenName == 'basic_english': # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}"). format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.saveBook(thisBook) self.doPostLoadProcessing()
def load( self ): """ Load all the books out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("load()") ) assert self.preloadDone if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.suppliedMetadata['MySword']['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.suppliedMetadata['MySword']['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 # Create the first book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) ) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) ) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) #print( "MySword.load", BBB, C, V, repr(line) ) handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print( " MySword saving", BBB, bookCount+1 ) self.stashBook( thisBook ) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = self.BOS.getNextBookCode( BBB ) # Create the next book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' haveLines = False verseList = self.BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C-1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine( 'p', '' ) ourGlobals['haveParagraph'] = False self.cursor.close() self.applySuppliedMetadata( 'MySword' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) def decodeVerse(encodedVerseString): """ Decodes the verse which has @ format codes. """ verseString = encodedVerseString if verseString.startswith( '@@'): # This simply means that encoding follows verseString = verseString[2:] if verseString.startswith( '@@'): # This simply means that encoding follows verseString = verseString[2:] # Paragraph markers (marked now with double backslash) verseString = verseString.replace('@^', '\\\\p ') verseString = verseString.replace('@0', '\\\\m ') verseString = verseString.replace('@1', '\\\\q1 ').replace( '@2', '\\\\q2 ').replace('@3', '\\\\q3 ').replace('@4', '\\q4 ') verseString = verseString.replace('@8', '\\\\m ') # Character markers (marked now with single backslash) verseString = verseString.replace('@6', '\\wj ').replace('@5', '\\wj*') verseString = verseString.replace('@9', '\\add ').replace( '@7', '\\add*') # or \\i ??? verseString = re.sub(r'@<f([0-9])@>@/', r'\\ff\1', verseString) verseString = re.sub(r'@<x([0-9])@>@/', r'\\xx\1', verseString) #print( repr( verseString ) ) assert ('@' not in verseString) return verseString # end of decodeVerse # Read all the lines into bookDict lastLine, lineCount = '', 0 bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict( ), OrderedDict(), {}, {}, {} BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = '' lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF #logging.info( " YETBible.load: Detected UTF-16 Byte Order Marker" ) #line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line #print ( 'YETBible file line is "' + line + '"' ) bits = line.split('\t') #print( self.givenName, BBB, bits ) if bits[0] == 'info': assert (len(bits) == 3) if bits[1] == 'shortName': shortName = bits[2] self.name = shortName elif bits[1] == 'longName': longName = bits[2] elif bits[1] == 'description': description = bits[2] elif bits[1] == 'locale': locale = bits[2] assert (2 <= len(locale) <= 3) if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding else: logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \ .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) ) continue elif bits[0] == 'book_name': assert (3 <= len(bits) <= 4) thisBBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1]) if len(bits) == 3: bookNameDict[thisBBB] = bits[2], '' elif len(bits) == 4: bookNameDict[thisBBB] = bits[2], bits[3] continue elif bits[0] == 'verse': assert (len(bits) == 5) bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) ) if BBB != lastBBB: # We have a new book if lastBBB is not None: # We have a completed book to save bookDict[lastBBB] = bookLines assert (BBB in bookNameDict) bookLines = OrderedDict() # Keys are (C,V) strings verseString = decodeVerse(encodedVerseString) bookLines[(chapterNumberString, verseNumberString )] = verseString # Just store it for now lastBBB = BBB continue elif bits[0] == 'pericope': assert (len(bits) == 5) bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) headingString = encodedHeadingString.replace( '@9', '\\it ').replace('@7', '\\it*') #print( repr(encodedHeadingString), repr(headingString) ) assert ('@' not in headingString) headingDict[(BBB, chapterNumberString, verseNumberString)] = headingString, [ ] # Blank refList continue elif bits[ 0] == 'parallel': # These lines optionally follow pericope lines assert (len(bits) == 2) heading, refList = headingDict[(BBB, chapterNumberString, verseNumberString)] refList.append(bits[1]) #print( "parallel2", repr(heading), refList ) headingDict[(BBB, chapterNumberString, verseNumberString)] = heading, refList continue elif bits[0] == 'xref': assert (len(bits) == 6) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) assert (indexNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) noteString = encodedNoteString.replace('@9', '\\it ').replace( '@7', '\\it*') noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now noteString = noteString.replace('@/', '') #print( repr(encodedNoteString), repr(noteString) ) assert ('@' not in noteString) xrefDict[(BBB, chapterNumberString, verseNumberString, indexNumberString)] = noteString continue elif bits[0] == 'footnote': assert (len(bits) == 6) bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[ 1:] if BibleOrgSysGlobals.debugFlag: assert (bookNumberString.isdigit()) assert (chapterNumberString.isdigit()) assert (verseNumberString.isdigit()) assert (indexNumberString.isdigit()) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString) noteString = encodedNoteString.replace('@9', '\\it ').replace( '@7', '\\it*') assert ('@' not in noteString) footnoteDict[(BBB, chapterNumberString, verseNumberString, indexNumberString)] = noteString continue else: print("YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) halt bookDict[lastBBB] = bookLines # Save the last book #if bookCode != lastBookCode: # We've started a new book #if lastBookCode != -1: # Better save the last book #self.saveBook( thisBook ) #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode ) #thisBook = BibleBook( self, BBB ) #thisBook.objectNameString = "YET Bible Book object" #thisBook.objectTypeString = "YET" #lastBookCode = bookCode #lastChapterNumber = lastVerseNumber = -1 #if chapterNumber != lastChapterNumber: # We've started a new chapter #if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception #if chapterNumber == 0: #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.addLine( 'c', chapterNumberString ) #lastChapterNumber = chapterNumber #lastVerseNumber = -1 ## Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #continue #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': ## Move Psalm titles to verse zero #verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) #thisBook.addLine( 'v', verseNumberString + ' ' + vText ) #lastVText = vText #lastVerseNumber = verseNumber # Now process the books for BBB, bkData in bookDict.items(): #print( "Processing", BBB ) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "YET Bible Book object" thisBook.objectTypeString = "YET" lastChapterNumberString = None for (chapterNumberString, verseNumberString), verseString in bkData.items(): # Insert headings (can only occur before verses) if (BBB, chapterNumberString, verseNumberString) in headingDict: heading, refList = headingDict[(BBB, chapterNumberString, verseNumberString)] #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) thisBook.addLine('s', heading) if refList: refString = "" #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList ) for ref in refList: refString += ('; ' if refString else '') + ref #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) ) thisBook.addLine('r', '(' + refString + ')') # Insert footnotes and cross-references while ('\\ff' in verseString): #print( "footnote", repr(verseString) ) fIx = verseString.index('\\ff') caller = verseString[fIx + 3] #print( "fcaller", repr(caller) ) assert (caller.isdigit()) note = footnoteDict[(BBB, chapterNumberString, verseNumberString, caller)] #print( "fnote", repr(note) ) verseString = verseString[: fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[ fIx + 4:] #print( "fvS", repr(verseString) ) while ('\\xx' in verseString): #print( "xref", repr(verseString) ) fIx = verseString.index('\\xx') caller = verseString[fIx + 3] #print( "xcaller", repr(caller) ) assert (caller.isdigit()) note = xrefDict[(BBB, chapterNumberString, verseNumberString, caller)] #print( "xnote", repr(note) ) verseString = verseString[: fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[ fIx + 4:] #print( "xvS", repr(verseString) ) # Save the Bible data fields if chapterNumberString != lastChapterNumberString: thisBook.addLine('c', chapterNumberString) lastChapterNumberString = chapterNumberString #print( BBB, chapterNumberString, verseNumberString, repr(verseString) ) if verseString.startswith( '\\\\'): # It's an initial paragraph marker if verseString[3] == ' ': marker, verseString = verseString[2], verseString[4:] elif verseString[4] == ' ': marker, verseString = verseString[2:4], verseString[5:] else: halt #print( '', '\\'+marker ) thisBook.addLine(marker, '') assert (not verseString.startswith('\\\\')) bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers) for j, bit in enumerate(bits): #print( "loop", j, repr(bit), repr(verseString) ) if j == 0: thisBook.addLine( 'v', verseNumberString + ' ' + verseString.rstrip()) else: if bit[1] == ' ': marker, bit = bit[0], bit[2:] elif bit[2] == ' ': marker, bit = bit[0:2], bit[3:] else: halt #print( "mV", marker, repr(bit), repr(verseString) ) thisBook.addLine(marker, bit.rstrip()) self.saveBook(thisBook) self.doPostLoadProcessing()
def loadBook( self, BBB ): """ Load the requested book out of the SQLite3 database. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("loadBook( {} )").format( BBB ) ) assert self.preloadDone if BBB in self.books: if BibleOrgSysGlobals.debugFlag: print( " {} is already loaded -- returning".format( BBB ) ) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name ) ) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath ) ) #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'BOTH', 'GEN' #booksExpected, textLineCountExpected = 1, 31102 #elif self.suppliedMetadata['MySword']['OT']: #testament, BBB = 'OT', 'GEN' #booksExpected, textLineCountExpected = 1, 23145 #elif self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'NT', 'MAT' #booksExpected, textLineCountExpected = 1, 7957 # Create the first book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) ) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) ) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) #print( "MySword.load", BBB, C, V, repr(line) ) handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C <= numC: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C-1] V = 1 else: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 2: print( " MySword saving", BBB ) self.stashBook( thisBook ) #else: print( "Not saving", BBB ) break if ourGlobals['haveParagraph']: thisBook.addLine( 'p', '' ) ourGlobals['haveParagraph'] = False
def load(self): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata["Unbound"] = {} lastLine, lineCount = "", 0 BBB = None NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None subverseNumberString = sequenceNumberString = None lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1 lastVText = "" with open(self.sourceFilepath, encoding=self.encoding) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 # if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF # logging.info( " UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" ) # line = line[1:] # Remove the Unicode Byte Order Marker (BOM) if line[-1] == "\n": line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line # print ( 'UB file line is "' + line + '"' ) if line[0] == "#": hashBits = line[1:].split("\t") if len(hashBits) == 2 and hashBits[1]: # We have some valid meta-data self.suppliedMetadata["Unbound"][hashBits[0]] = hashBits[1] # if hashBits[0] == 'name': self.name = hashBits[1] # elif hashBits[0] == 'filetype': self.filetype = hashBits[1] # elif hashBits[0] == 'copyright': self.copyright = hashBits[1] # elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1] # elif hashBits[0] == 'language': self.language = hashBits[1] # elif hashBits[0] == 'note': self.note = hashBits[1] # elif hashBits[0] == 'columns': self.columns = hashBits[1] # logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) ) continue # Just discard comment lines bits = line.split("\t") # print( self.givenName, BBB, bits ) if len(bits) == 4: bookCode, chapterNumberString, verseNumberString, vText = bits elif len(bits) == 6: bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = ( bits ) elif len(bits) == 9: NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = ( bits ) elif len(bits) == 1 and self.givenName.startswith("lxx_a_parsing_"): logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits, ) halt if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3 if NRSVA_chapterNumberString: assert NRSVA_chapterNumberString.isdigit() if NRSVA_verseNumberString: assert NRSVA_verseNumberString.isdigit() if not bookCode and not chapterNumberString and not verseNumberString: print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if BibleOrgSysGlobals.debugFlag: assert verseNumberString.isdigit() if subverseNumberString: logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) ) vText = vText.strip() # Remove leading and trailing spaces if not vText: continue # Just ignore blank verses I think if vText == "+": continue # Not sure what this means in basic_english JHN 1:38 chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if sequenceNumberString: if BibleOrgSysGlobals.debugFlag: assert sequenceNumberString.isdigit() sequenceNumber = int(sequenceNumberString) if BibleOrgSysGlobals.debugFlag: assert sequenceNumber > lastSequence or self.givenName in ( "gothic_latin", "hebrew_bhs_consonants", "hebrew_bhs_vowels", "latvian_nt", "ukrainian_1871", ) # Why??? lastSequence = sequenceNumber if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode(bookCode) thisBook = BibleBook(self, BBB) thisBook.objectNameString = "Unbound Bible Book object" thisBook.objectTypeString = "Unbound" lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == "ESG" # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine("c", chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if ( BBB == "PSA" and verseNumberString == "1" and vText.startswith("<") and self.givenName == "basic_english" ): # Move Psalm titles to verse zero verseNumber = 0 if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) thisBook.addLine("v", verseNumberString + " " + vText) lastVText = vText lastVerseNumber = verseNumber # Save the final book self.stashBook(thisBook) self.applySuppliedMetadata("Unbound") # Copy some to self.settingsDict self.doPostLoadProcessing()
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in filenameEndingsToAccept: logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) ) elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ): logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) ) connection = sqlite3.connect( self.sourceFilepath ) connection.row_factory = sqlite3.Row # Enable row names cursor = connection.cursor() # First get the settings cursor.execute( 'select * from Details' ) row = cursor.fetchone() for key in row.keys(): self.settingsDict[key] = row[key] #print( self.settingsDict ); halt if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description'] if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) ) if self.settingsDict['OT'] and self.settingsDict['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.settingsDict['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.settingsDict['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" ) # Create the first book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) ) try: row = cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #print( "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #print( row ) line = None #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) ) else: # line is not None if not isinstance( line, str ): if 'encryption' in self.settingsDict: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) ) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) ) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) ) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) ) line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) #print( "MySword.load", BBB, C, V, repr(line) ) handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals ) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 ) self.saveBook( thisBook ) #else: print( "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = BOS.getNextBookCode( BBB ) # Create the next book thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "MySword Bible Book object" thisBook.objectTypeString = "MySword" haveLines = False verseList = BOS.getNumVersesList( BBB ) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB ) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C-1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine( 'p', '' ) ourGlobals['haveParagraph'] = False cursor.close() self.doPostLoadProcessing()
class USFXXMLBible( Bible ): """ Class to load and manipulate USFX Bibles. """ def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ): """ Create the internal USFX Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "USFX XML Bible object" self.objectTypeString = "USFX" self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters # Now we can set our object variables self.name = self.givenName if not self.name: self.name = os.path.basename( self.sourceFolder ) if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash if not self.name: self.name = "USFX Bible" if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects # Do a preliminary check on the readability of our folder if not os.access( self.sourceFolder, os.R_OK ): logging.error( "USFXXMLBible: Folder {!r} is unreadable".format( self.sourceFolder ) ) # Do a preliminary check on the contents of our folder self.sourceFilename = self.sourceFilepath = None foundFiles, foundFolders = [], [] for something in os.listdir( self.sourceFolder ): somepath = os.path.join( self.sourceFolder, something ) if os.path.isdir( somepath ): foundFolders.append( something ) elif os.path.isfile( somepath ): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith( ending): ignore=True; break if ignore: continue if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append( something ) else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) ) if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, foundFolders ) ) if not foundFiles: if BibleOrgSysGlobals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in {!r}".format( self.sourceFolder ) ) return # No use continuing #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles ) numFound = 0 for thisFilename in sorted( foundFiles ): firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, sourceFolder, numLines=3 ) if not firstLines or len(firstLines)<2: continue if not firstLines[0].startswith( '<?xml version="1.0"' ) \ and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) ) continue if "<usfx " not in firstLines[0]: continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound ) if numFound == 1: self.sourceFilename = lastFilenameFound self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename ) elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print( " Looked hopeful but no actual files found" ) # end of USFXXMLBible.__init_ def load( self ): """ Load the XML data file -- we should already know the filepath. """ if BibleOrgSysGlobals.verbosityLevel > 1: print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) ) #if BibleOrgSysGlobals.verbosityLevel > 2: print( _(" It seems we have {}...").format( BBB ) ) #self.thisBook = BibleBook( self, BBB ) #self.thisBook.objectNameString = "OSIS XML Bible Book object" #self.thisBook.objectTypeString = "OSIS" #self.haveBook = True try: self.tree = ElementTree().parse( self.sourceFilepath ) except ParseError: errorString = sys.exc_info()[1] logging.critical( "USFXXMLBible.load: failed loading the xml file {}: {!r}.".format( self.sourceFilepath, errorString ) ) return if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all # Find the main (osis) container if self.tree.tag == 'usfx': location = "USFX file" BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' ) # Process the attributes first self.schemaLocation = None for attrib,value in self.tree.items(): #print( "attrib", repr(attrib), repr(value) ) if attrib.endswith("SchemaLocation"): self.schemaLocation = value else: logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) ) BBB = C = V = None for element in self.tree: #print( "element", repr(element.tag) ) sublocation = element.tag + " " + location if element.tag == 'languageCode': self.languageCode = element.text BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'cff3' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'des1' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'dwf2' ) elif element.tag == 'book': self.loadBook( element ) ##BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '54f2' ) #BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'hd35' ) ## Process the attributes #idField = bookStyle = None #for attrib,value in element.items(): #if attrib=='id' or attrib=='code': #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode) ##if idField != BBB: ## logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) ) #elif attrib=='style': #bookStyle = value #else: #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) else: logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) ) #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXXMLBible.load: Didn't find any regularly named USFX files in {!r}".format( self.sourceFolder ) ) for thisFilename in foundFiles: # Look for BBB in the ID line (which should be the first line in a USFX file) isUSFX = False thisPath = os.path.join( self.sourceFolder, thisFilename ) with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done for line in possibleUSXFile: if line.startswith( '\\id ' ): USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id if BibleOrgSysGlobals.verbosityLevel > 2: print( "Have possible USFX ID {!r}".format( USXId ) ) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( USXId ) if BibleOrgSysGlobals.verbosityLevel > 2: print( "BBB is {!r}".format( BBB ) ) isUSFX = True break # We only look at the first line if isUSFX: UBB = USFXXMLBibleBook( self, BBB ) UBB.load( self.sourceFolder, thisFilename, self.encoding ) UBB.validateMarkers() print( UBB ) self.books[BBB] = UBB # Make up our book name dictionaries while we're at it assumedBookNames = UBB.getAssumedBookNames() for assumedBookName in assumedBookNames: self.BBBToNameDict[BBB] = assumedBookName assumedBookNameLower = assumedBookName.lower() self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case) self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case) if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces) if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) ) self.doPostLoadProcessing() # end of USFXXMLBible.load def loadBook( self, bookElement ): """ Load the book container from the XML data file. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) ) assert( bookElement.tag == 'book' ) mainLocation = self.name + " USFX book" # Process the attributes first bookCode = None for attrib,value in bookElement.items(): if attrib == 'id': bookCode = value else: logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) ) BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( bookCode ) mainLocation = "{} USFX {} book".format( self.name, BBB ) if BibleOrgSysGlobals.verbosityLevel > 2: print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) ) BibleOrgSysGlobals.checkXMLNoText( self.tree, mainLocation, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, mainLocation, '1wk8' ) # Now create our actual book self.thisBook = BibleBook( self, BBB ) self.thisBook.objectNameString = "USFX XML Bible Book object" self.thisBook.objectTypeString = "USFX" C = V = '0' for element in bookElement: #print( "element", repr(element.tag) ) location = "{} of {} {}:{}".format( element.tag, mainLocation, BBB, C, V ) if element.tag == 'id': idText = clean( element.text ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'vsg3' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ksq2' ) for attrib,value in element.items(): if attrib == 'id': assert( value == bookCode ) else: logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'id', bookCode + ((' '+idText) if idText else '') ) elif element.tag == 'ide': ideText = clean( element.text ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'jsa0' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ls01' ) charset = None for attrib,value in element.items(): if attrib == 'charset': charset = value else: logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'ide', charset + ((' '+ideText) if ideText else '') ) elif element.tag == 'h': hText = element.text BibleOrgSysGlobals.checkXMLNoTail( element, location, 'dj35' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'hs35' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'hs32' ) self.thisBook.addLine( 'h', clean(hText) ) elif element.tag == 'toc': tocText = element.text BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ss13' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js13' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems compulsory level = value else: logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'toc'+level, clean(tocText) ) elif element.tag == 'c': BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone for attrib,value in element.items(): if attrib == 'id': C, V = value, '0' else: logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.addLine( 'c', C ) elif element.tag == 's': sText = clean( element.text ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'wxg0' ) level = None for attrib,value in element.items(): if attrib == 'level': # Seems optional level = value else: logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) marker = 's' if level: marker += level self.thisBook.addLine( marker, sText ) for subelement in element: #print( "subelement", repr(subelement.tag) ) sublocation = subelement.tag + " of " + location if subelement.tag == 'f': self.loadFootnote( subelement, sublocation, BBB, C, V ) elif subelement.tag == 'x': self.loadCrossreference( subelement, sublocation ) elif subelement.tag == 'fig': self.loadFigure( subelement, sublocation ) elif subelement.tag == 'table': self.loadTable( subelement, sublocation ) elif subelement.tag in ('add','it','bd','bdit','sc',): self.loadCharacterFormatting( subelement, sublocation, BBB, C, V ) elif subelement.tag == 'optionalLineBreak': print( "What is loadBook optionalLineBreak?" ) else: logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) ) elif element.tag in ('p','q','d',): V = self.loadParagraph( element, location, BBB, C ) elif element.tag == 'b': BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'nd04' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) self.thisBook.addLine( 'b', '' ) elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers marker, text = element.tag, clean(element.text) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'od01' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'gd92' ) idField = None for attrib,value in element.items(): if attrib == 'id': idField = value else: logging.warning( _("dv35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if idField and text is None: text = idField else: logging.warning( _("dve4 Unprocessed idField ({}) in {}").format( idField, location ) ) if text is None: logging.critical( "Why is {} empty at {}".format( marker, location ) ) assert( text is not None ) self.thisBook.addLine( marker, text ) elif element.tag == 'table': self.loadTable( element, location ) elif element.tag == 've': # What's this in Psalms: <c id="4" /><ve /><d>For the Chief Musician; on stringed instruments. A Psalm of David.</d> BibleOrgSysGlobals.checkXMLNoText( element, location, 'kds3' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ks29' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'kj24' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js91' ) #self.thisBook.addLine( 'b', '' ) if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring 've' field", BBB, C, V ) else: logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) ) #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt self.saveBook( self.thisBook ) # end of USFXXMLBible.loadBook def loadParagraph( self, paragraphElement, paragraphLocation, BBB, C ): """ Load the paragraph (p or q) container from the XML data file. """ #if BibleOrgSysGlobals.verbosityLevel > 3: #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) ) V = None pTag, pText = paragraphElement.tag, clean(paragraphElement.text) BibleOrgSysGlobals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' ) # Process the attributes first sfm = level = style = None for attrib,value in paragraphElement.items(): if attrib == 'sfm': sfm = value elif attrib == 'level': level = value elif attrib == 'style': style = value else: logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) ) if sfm: assert( pTag == 'p' ) pTag = sfm if level: #assert( pTag == 'q' ) # Could also be mt, etc. pTag += level if style: #print( repr(pTag), repr(pText), repr(style) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring {!r} style".format( style ) ) self.thisBook.addLine( pTag, '' if pText is None else pText ) for element in paragraphElement: location = element.tag + " of " + paragraphLocation #print( "element", repr(element.tag) ) if element.tag == 'v': # verse milestone vTail = clean( element.tail ) # Main verse text BibleOrgSysGlobals.checkXMLNoText( element, location, 'crc2' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'lct3' ) lastV, V = V, None for attrib,value in element.items(): if attrib == 'id': V = value else: logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) assert( V is not None ) assert( V ) self.thisBook.addLine( 'v', V + ((' '+vTail) if vTail else '' ) ) elif element.tag == 've': # verse end milestone -- we can just ignore this BibleOrgSysGlobals.checkXMLNoText( element, location, 'lsc3' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'mfy4' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'bd24' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ks35' ) elif element.tag == 'fig': self.loadFigure( element, location ) elif element.tag == 'table': self.loadTable( element, location ) elif element.tag == 'f': #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) ) self.loadFootnote( element, location, BBB, C, V ) elif element.tag == 'x': #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) ) self.loadCrossreference( element, location ) elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting self.loadCharacterFormatting( element, location, BBB, C, V ) elif element.tag == 'cs': # character style -- seems like a USFX hack text, tail = clean(element.text), clean(element.tail) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kf92' ) sfm = None for attrib,value in element.items(): if attrib == 'sfm': sfm = value else: logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) ) self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) ) elif element.tag in ('cp',): # Simple single-line paragraph-level markers marker, text = element.tag, clean(element.text) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'kdf0' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'lkj1' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'da13' ) self.thisBook.addLine( marker, text ) elif element.tag == 'ref': # encoded reference -- seems like a USFX hack text, tail = clean(element.text), clean(element.tail) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'bd83' ) target = None for attrib,value in element.items(): if attrib == 'tgt': target = value else: logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) ) self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) ) #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) ) elif element.tag == 'optionalLineBreak': print( "What is loadParagraph optionalLineBreak?" ) if BibleOrgSysGlobals.debugFlag: halt elif element.tag == 'milestone': # e.g., <milestone sfm="pb" attribute=""/> (pb = explicit page break) BibleOrgSysGlobals.checkXMLNoText( element, location, 'jzx2' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ms23' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'dw24' ) sfm = None for attrib,value in element.items(): if attrib == 'sfm': sfm = value else: logging.warning( _("mcd2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if sfm not in ('pb',): print( "milestone sfm got", repr(sfm) ) self.thisBook.addLine( sfm, '' ) else: logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) ) return V # end of USFXXMLBible.loadParagraph def loadCharacterFormatting( self, element, location, BBB, C, V ): """ """ marker, text, tail = element.tag, clean(element.text), clean(element.tail) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'sd12' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) ) for subelement in element: sublocation = subelement.tag + " of " + location #print( "element", repr(element.tag) ) if subelement.tag == 'f': #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) ) self.loadFootnote( subelement, sublocation, BBB, C, V ) else: logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) ) # end of USFXXMLBible.loadCharacterFormatting def loadFigure( self, element, location ): """ """ BibleOrgSysGlobals.checkXMLNoText( element, location, 'ff36' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'cf35' ) figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' } for subelement in element: sublocation = subelement.tag + " of " + location figTag, figText = subelement.tag, clean(subelement.text) assert( figTag in figDict ) figDict[figTag] = '' if figText is None else figText BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'jkf5' ) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'ld18' ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'hb46' ) newString = '' for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ): newString += ('' if j==0 else '|') + figDict[tag] figTail = clean( element.tail ) self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) ) # end of USFXXMLBible.loadFigure def loadTable( self, element, location ): """ """ BibleOrgSysGlobals.checkXMLNoText( element, location, 'kg92' ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ka92' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'ks63' ) for subelement in element: sublocation = subelement.tag + " of " + location if subelement.tag == 'tr': #print( "table", sublocation ) self.thisBook.addLine( 'tr', '' ) BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'sg32' ) BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'dh82' ) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'mniq' ) for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation tag, text = sub2element.tag, clean(sub2element.text) assert( tag in ('th', 'thr', 'tc', 'tcr',) ) BibleOrgSysGlobals.checkXMLNoTail( sub2element, sub2location, 'ah82' ) BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' ) level = None for attrib,value in sub2element.items(): if attrib == 'level': level = value else: logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) marker = tag + (level if level else '') self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) ) else: logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) ) # end of USFXXMLBible.loadTable def loadFootnote( self, element, location, BBB, C, V ): """ Handles footnote fields, including xt field. """ text, tail = clean(element.text), clean(element.tail) caller = None for attrib,value in element.items(): if attrib == 'caller': caller = value else: logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) ) for subelement in element: sublocation = subelement.tag + " of " + location marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail) #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) ) #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',): #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) ) if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq','xt',) ) if marker=='ref': assert( fText ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'ls13' ) target = None for attrib,value in subelement.items(): if attrib == 'tgt': target = value else: logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) ) else: halt else: BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'dq54' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) ) if marker=='xt' or marker[0]=='f': # Starts with f, e.g., fr, ft for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail) BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'js72' ) if marker2 == 'ref': #print( sub2location ) if fText2: #print( 'ft2', marker2, repr(fText2), repr(fTail2), sub2location ) self.thisBook.appendToLastLine( fText2 ) target = None for attrib,value in sub2element.items(): if attrib == 'tgt': target = value # OSIS style reference, e.g., '1SA.27.8' else: logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) if target: #print( 'tg', marker2, repr(target) ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) ) else: if debuggingThisModule: halt elif marker2 in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting self.loadCharacterFormatting( sub2element, sub2location, BBB, C, V ) else: print( 'Ignored marker2', repr(marker2), BBB, C, V ) if debuggingThisModule: halt if fTail2: self.thisBook.appendToLastLine( fTail2 ) elif marker in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting self.loadCharacterFormatting( subelement, sublocation, BBB, C, V ) else: print( 'Ignored marker', repr(marker), BBB, C, V ) halt if fTail: self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) ) self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) ) # end of USFXXMLBible.loadFootnote def loadCrossreference( self, element, location ): """ Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x> """ text, tail = clean(element.text), clean(element.tail) caller = None for attrib,value in element.items(): if attrib == 'caller': caller = value else: logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) ) for subelement in element: sublocation = subelement.tag + " of " + location marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail) #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) ) #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','xo','xt',): #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) ) if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','xo','xt',) ) if marker=='ref': assert( xText ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 's1sd' ) target = None for attrib,value in subelement.items(): if attrib == 'tgt': target = value else: logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) ) else: halt else: BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'sc35' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) ) if marker[0] == 'x': # Starts with x, e.g., xo, xt for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail) BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' ) if marker2=='ref': if xText2: #print( 'xt2', marker2, repr(xText2), repr(xTail2), sub2location ) self.thisBook.appendToLastLine( xText2 ) target = None for attrib,value in sub2element.items(): if attrib == 'tgt': target = value else: logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) ) else: halt else: halt if xTail2: self.thisBook.appendToLastLine( xTail2 ) else: halt if xTail: self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) ) self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
def load( self ): """ Load a single source file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' ) if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' ) if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' ) if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount==1: if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff logging.info( " ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search( '^; TITLE:\\s', line ) if match: if BibleOrgSysGlobals.debugFlag: print( "First line got type {!r} match from {!r}".format( match.group(0), line ) ) else: if BibleOrgSysGlobals.verbosityLevel > 2: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #print ( 'ForgeForSwordSearcher file line is "' + line + '"' ) lastLine = line # Process header stuff if line.startswith( '; TITLE:' ): string = line[8:].strip() if string: settingsDict['TITLE'] = string continue elif line.startswith( '; ABBREVIATION:' ): string = line[15:].strip() if string: settingsDict['ABBREVIATION'] = string continue elif line.startswith( '; HAS ITALICS' ): string = line[14:].strip() if string: settingsDict['HAS_ITALICS'] = string continue elif line.startswith( '; HAS FOOTNOTES:' ): string = line[15:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith( '; HAS FOOTNOTES' ): string = line[14:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith( '; HAS REDLETTER' ): string = line[14:].strip() if string: settingsDict['HAS_REDLETTER'] = string continue elif line[0]==';': logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}".format( line ) ) continue # Just discard comment lines # Process the main segment if line.startswith( '$$ ' ): if metadataName and metadataContents: settingsDict[metadataName] = metadataContents metadataName = None pointer = line[3:] #print( "pointer", repr(pointer) ) if pointer and pointer[0]=='{' and pointer[-1]=='}': metadataName = pointer[1:-1] if metadataName: #print( "metadataName", repr(metadataName) ) metadataContents = '' else: # let's assume it's a BCV reference pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) B_CV_Bits = pointer.split( ' ', 1 ) if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: bookCode, CVString = B_CV_Bits chapterNumberString, verseNumberString = CVString.split( ':' ) chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCode != lastBookCode: # We've started a new book if bookCode in ('Ge',): BBB = 'GEN' elif bookCode in ('Le',): BBB = 'LEV' elif bookCode in ('La',): BBB = 'LAM' ##elif bookCode in ('Es',): BBB = 'EST' ##elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #print( "4BookCode =", repr(bookCode) ) #BBB = BOS.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode ) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode ) # Try to guess #print( "4BBB =", repr(BBB) ) else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ) continue # Just save the pointer information which refers to the text on the next line else: # it's not a $$ line text = line #print( "text", repr(text) ) if metadataName: metadataContents += ('\n' if metadataContents else '') + text continue else: vText = text # Handle bits like (<scripref>Pr 2:7</scripref>) vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) ) #print( BBB, chapterNumber, verseNumber, repr(vText) ) # Convert {stuff} to footnotes match = re.search( '\\{(.+?)\\}', vText ) while match: footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search( '\\{(.+?)\\}', vText ) # Convert [stuff] to added fields match = re.search( '\\[(.+?)\\]', vText ) while match: addText = '\\add {}\\add*'.format( match.group(1) ) vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search( '\\[(.+?)\\]', vText ) # Convert +r/This text is red-letter-r/ to wj fields match = re.search( '\\+r/(.+?)-r/', vText ) while match: addText = '\\wj {}\\wj*'.format( match.group(1) ) vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk #print( BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search( '\\+r/(.+?)-r/', vText ) # Final check for unexpected remaining formatting for badChar in '{}[]/': if badChar in vText: logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) break if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook( thisBook ) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) ) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object' thisBook.objectTypeString = 'ForgeForSwordSearcher' verseList = BOSx.getNumVersesList( BBB ) numChapters, numVerses = len(verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code".format( bookCode ) ) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) ) # Check for paragraph markers if vText and vText[0]=='¶': thisBook.addLine( 'p', '' ) vText = vText[1:].lstrip() #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}".format( line ) ) # Save the final book if thisBook is not None: self.stashBook( thisBook ) # Clean up if settingsDict: #print( "ForgeForSwordSearcher settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Forge4SS'] = settingsDict self.applySuppliedMetadata( 'Forge4SS' ) # Copy some to self.settingsDict self.doPostLoadProcessing()