class VerseViewXMLBible(Bible): """ Class for reading, validating, and converting VerseViewXMLBible XML. """ XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}" treeTag = 'bible' filenameTag = 'fname' revisionTag = 'revision' titleTag = 'title' fontTag = 'font' copyrightTag = 'copyright' sizefactorTag = 'sizefactor' bookTag = 'b' chapterTag = 'c' verseTag = 'v' def __init__(self, sourceFolder, givenName, encoding='utf-8'): """ Constructor: just sets up the VerseView Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = 'VerseView XML Bible object' self.objectTypeString = 'VerseView' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.XMLTree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible: File {!r} is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of VerseViewXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VerseView'] = {} # Find the main (bible) container if self.XMLTree.tag == VerseViewXMLBible.treeTag: location = "VerseView XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location, 'js24') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') # Find the submain (various info and then book) containers bookNumber = 0 for element in self.XMLTree: if element.tag == VerseViewXMLBible.filenameTag: sublocation = "filename in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') #self.filename = element.text elif element.tag == VerseViewXMLBible.revisionTag: sublocation = "revision in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Revision'] = element.text elif element.tag == VerseViewXMLBible.titleTag: sublocation = "title in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Title'] = element.text elif element.tag == VerseViewXMLBible.fontTag: sublocation = "font in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Font'] = element.text elif element.tag == VerseViewXMLBible.copyrightTag: sublocation = "copyright in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Copyright'] = element.text elif element.tag == VerseViewXMLBible.sizefactorTag: sublocation = "sizefactor in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') if BibleOrgSysGlobals.debugFlag: assert element.text == '1' elif element.tag == VerseViewXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') bookNumber += 1 self.__validateAndExtractBook(element, bookNumber) else: logging.error( "xk15 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( VerseViewXMLBible.treeTag, self.XMLTree.tag)) if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: # These are all compulsory so they should all exist #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) ) vPrint( 'Quiet', debuggingThisModule, "Revision is {!r}".format( self.suppliedMetadata['VerseView']['Revision'])) vPrint( 'Quiet', debuggingThisModule, "Title is {!r}".format( self.suppliedMetadata['VerseView']['Title'])) vPrint( 'Quiet', debuggingThisModule, "Font is {!r}".format( self.suppliedMetadata['VerseView']['Font'])) vPrint( 'Quiet', debuggingThisModule, "Copyright is {!r}".format( self.suppliedMetadata['VerseView']['Copyright'])) #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) ) self.applySuppliedMetadata( 'VerseView') # Copy some to self.settingsDict self.doPostLoadProcessing() # end of VerseViewXMLBible.load def __validateAndExtractBook(self, book, bookNumber): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText(bookName) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText(adjustedBookName) BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookNumber) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB)) BBB = BBB2 #vPrint( 'Quiet', debuggingThisModule, BBB ); halt if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook) # end of VerseViewXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB: str, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3: vPrint('Quiet', debuggingThisModule, _("Validating XML chapter…")) # Process the chapter attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #vPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber ) thisBook.addLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for {}".format(BBB)) for element in chapter: if element.tag == VerseViewXMLBible.verseTag: location = "verse in {} {}".format(BBB, chapterNumber) self.__validateAndExtractVerse(BBB, chapterNumber, thisBook, element) else: logging.error("sv34 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.verseTag, element.tag)) # end of VerseViewXMLBible.__validateAndExtractChapter def __validateAndExtractVerse(self, BBB: str, chapterNumber, thisBook, verse): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3: vPrint('Quiet', debuggingThisModule, _("Validating XML verse…")) location = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20') BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks') # Handle verse attributes verseNumber = toVerseNumber = None for attrib, value in verse.items(): if attrib == "n": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber location = "{}:{}".format( location, verseNumber) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) ## Handle verse subelements (notes and styled portions) #for subelement in verse: #if subelement.tag == VerseViewXMLBible.noteTag: #sublocation = "note in " + location #noteType = None #for attrib,value in subelement.items(): #if attrib=="type": noteType = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if noteType and noteType not in ('variant',): #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) #nText, nTail = subelement.text, subelement.tail ##vPrint( 'Quiet', debuggingThisModule, "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) #if nTail: #if '\n' in nTail: #vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) #nTail = nTail.replace( '\n', ' ' ) #vText += nTail #for sub2element in subelement: #if sub2element.tag == VerseViewXMLBible.styleTag: #sub2location = "style in " + sublocation #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' ) #fs = css = idStyle = None #for attrib,value in sub2element.items(): #if attrib=='fs': fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle #SFM = None #if fs == 'italic': SFM = '\\it' #elif fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt #sText, sTail = sub2element.text.strip(), sub2element.tail #if BibleOrgSysGlobals.debugFlag: assert sText #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) ) #elif subelement.tag == VerseViewXMLBible.styleTag: #sublocation = "style in " + location #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) #fs = css = idStyle = None #for attrib,value in subelement.items(): #if attrib=="fs": fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs #SFM = None #if fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt #sText, sTail = subelement.text.strip(), subelement.tail #if BibleOrgSysGlobals.debugFlag: assert sText ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, sublocation ) #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #elif subelement.tag == VerseViewXMLBible.breakTag: #sublocation = "line break in " + location #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) #art = None #for attrib,value in subelement.items(): #if attrib=="art": #art = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl' ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber ) ##assert vText #if vText: #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None #vText = '' #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) ##bTail = subelement.tail ##if bTail: vText = bTail.strip() #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}" .format(BBB, chapterNumber, verseNumber, vText)) vText = vText.replace('\n', ' ') thisBook.addLine('v', verseNumber + ' ' + vText) verseNumber = None
def load( self ): """ Load a single source file and load book elements. """ vPrint( 'Info', debuggingThisModule, _("Loading {}…").format( self.sourceFilepath ) ) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG' ) if BOS81 is None: BOS81 = BibleOrganisationalSystem( 'GENERIC-KJV-80-ENG' ) if BOSx is None: BOSx = BibleOrganisationalSystem( 'GENERIC-ENG' ) if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount==1: if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff logging.info( " VPLBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) # Try to identify the VPL type match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line ) if match: vplType = 1 else: match = re.search( '^(\\d{8})\\s', line ) if match: vplType = 2 else: match = re.search( '^# language_name:\\s', line ) if match: vplType = 3 #else: #match = re.search( '^; TITLE:\\s', line ) #if match: vplType = 4 if match: if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) ) else: vPrint( 'Verbose', debuggingThisModule, "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #vPrint( 'Quiet', debuggingThisModule, 'vplType', vplType ) #vPrint( 'Quiet', debuggingThisModule, 'VPL file line is "' + line + '"' ) lastLine = line # Process header stuff if vplType == 3: if line.startswith( '# language_name:' ): string = line[16:].strip() if string and string != 'Not available': settingsDict['LanguageName'] = string continue elif line.startswith( '# closest ISO 639-3:' ): string = line[20:].strip() if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string continue elif line.startswith( '# year_short:' ): string = line[13:].strip() if string and string != 'Not available': settingsDict['Year.short'] = string continue elif line.startswith( '# year_long:' ): string = line[12:].strip() if string and string != 'Not available': settingsDict['Year.long'] = string continue elif line.startswith( '# title:' ): string = line[8:].strip() if string and string != 'Not available': settingsDict['WorkTitle'] = string continue elif line.startswith( '# URL:' ): string = line[6:].strip() if string and string != 'Not available': settingsDict['URL'] = string continue elif line.startswith( '# copyright_short:' ): string = line[18:].strip() if string and string != 'Not available': settingsDict['Copyright.short'] = string continue elif line.startswith( '# copyright_long:' ): string = line[17:].strip() if string and string != 'Not available': settingsDict['Copyright.long'] = string continue elif line[0]=='#': logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) ) continue # Just discard comment lines #elif vplType == 4: #if line.startswith( '; TITLE:' ): #string = line[8:].strip() #if string: settingsDict['TITLE'] = string #continue #elif line.startswith( '; ABBREVIATION:' ): #string = line[15:].strip() #if string: settingsDict['ABBREVIATION'] = string #continue #elif line.startswith( '; HAS ITALICS:' ): #string = line[15:].strip() #if string: settingsDict['HAS_ITALICS'] = string #continue #elif line.startswith( '; HAS FOOTNOTES:' ): #string = line[15:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS FOOTNOTES' ): #string = line[14:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS REDLETTER:' ): #string = line[15:].strip() #if string: settingsDict['HAS_REDLETTER'] = string #continue #elif line[0]==';': #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) ) #continue # Just discard comment lines # Process the main segment if vplType == 1: bits = line.split( ' ', 2 ) #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCodeText, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':' ) #vPrint( 'Quiet', debuggingThisModule, "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) ) if chapterNumberString == '': chapterNumberString = '1' # Handle a bug in some single chapter books in VPL else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits ) if not bookCodeText and not chapterNumberString and not verseNumberString: vPrint( 'Quiet', debuggingThisModule, "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert 2 <= len(bookCodeText) <= 4 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCodeText, BBB, chapterNumberString, verseNumberString ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit() continue chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB #if bookCodeText in ('Ge',): BBB = 'GEN' if bookCodeText == 'Le' and lastBBB == 'GEN': BBB = 'LEV' elif bookCodeText in ('Jud',) and lastBBB == 'JOS': BBB = 'JDG' #elif bookCodeText in ('Es',): BBB = 'EST' #elif bookCodeText in ('Pr',): BBB = 'PRO' #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG' #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM' #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP' #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT #elif bookCodeText in ('Jude',): BBB = 'JDE' #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ' #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN' else: BBB = BOS66.getBBBFromText( bookCodeText ) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCodeText ) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCodeText ) # Try to guess if not BBB: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( bookCodeText ) # Try to guess if not BBB: logging.critical( "VPL Bible: Unable to determine book code from text {!r} after {!r}={}".format( bookCodeText, lastBookCodeText, lastBBB ) ) halt # Handle special formatting # [square-brackets] are for Italicized words # <angle-brackets> are for the Words of Christ in Red # «chevrons» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0]=='«': #vPrint( 'Quiet', debuggingThisModule, "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB=='PSA' and verseNumberString=='1': # Psalm title vBits = vText[1:].split( '»' ) #vPrint( 'Quiet', debuggingThisModule, "vBits", vBits ) thisBook.addLine( 'd', vBits[0] ) # Psalm title vText = vBits[1].lstrip() # Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif vplType in (2,3): bits = line.split( '\t', 1 ) #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits ) bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:] #vPrint( 'Quiet', debuggingThisModule, bookNumberString, chapterNumberString, verseNumberString ) chapterNumberString = chapterNumberString.lstrip( '0' ) # Remove leading zeroes verseNumberString = verseNumberString.lstrip( '0' ) # Remove leading zeroes bookCodeText, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString) vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \ .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \ .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!') if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS', 76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2', 85:'PSS', 86:'ODE', } if 1 <= bookCodeText <= 66: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookCodeText ) else: BBB = bnDict[bookCodeText] #elif vplType == 4: #if line.startswith( '$$ ' ): #if metadataName and metadataContents: #settingsDict[metadataName] = metadataContents #metadataName = None #pointer = line[3:] ##vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) ) #if pointer and pointer[0]=='{' and pointer[-1]=='}': #metadataName = pointer[1:-1] #if metadataName: ##vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) ) #metadataContents = '' #else: # let's assume it's a BCV reference #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) #B_CV_Bits = pointer.split( ' ', 1 ) #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: #bookCodeText, CVString = B_CV_Bits #chapterNumberString, verseNumberString = CVString.split( ':' ) #chapterNumber = int( chapterNumberString ) #verseNumber = int( verseNumberString ) #if bookCodeText != lastBookCodeText: # We've started a new book #if bookCodeText in ('Ge',): BBB = 'GEN' #elif bookCodeText in ('Le',): BBB = 'LEV' #elif bookCodeText in ('La',): BBB = 'LAM' #else: ##vPrint( 'Quiet', debuggingThisModule, "4bookCodeText =", repr(bookCodeText) ) ##BBB = BOS.getBBBFromText( bookCodeText ) # Try to guess #BBB = BOS66.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText ) # Try to guess ##vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) ) #else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits ) #continue # Just save the pointer information which refers to the text on the next line #else: # it's not a $$ line #text = line ##vPrint( 'Quiet', debuggingThisModule, "text", repr(text) ) #if metadataName: #metadataContents += ('\n' if metadataContents else '') + text #continue #else: #vText = text ## Handle bits like (<scripref>Pr 2:7</scripref>) #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) ##if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'VPL vText', repr(vText) ) #if vplType == 4: # Forge for SwordSearcher ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) ## Convert {stuff} to footnotes #match = re.search( '\\{(.+?)\\}', vText ) #while match: #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\{(.+?)\\}', vText ) ## Convert [stuff] to added fields #match = re.search( '\\[(.+?)\\]', vText ) #while match: #addText = '\\add {}\\add*'.format( match.group(1) ) #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\[(.+?)\\]', vText ) #for badChar in '{}[]': #if badChar in vText: #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) #break else: logging.critical( 'Unknown VPL type {}'.format( vplType ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt if bookCodeText: if bookCodeText != lastBookCodeText: # We've started a new book if lastBookCodeText is not None: # Better save the last book self.stashBook( thisBook ) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) ) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VPL Bible Book object' thisBook.objectTypeString = 'VPL' verseList = BOSx.getNumVersesList( BBB ) numChapters, numVerses = len(verseList), verseList[0] lastBookCodeText = bookCodeText lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCodeText ) ) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, numChapters ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) # Check for paragraph markers if vText and vText[0]=='¶': thisBook.addLine( 'p', '' ) vText = vText[1:].lstrip() #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber else: # No bookCodeText yet logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) ) # Save the final book if thisBook is not None: self.stashBook( thisBook ) # Clean up if settingsDict: #vPrint( 'Quiet', debuggingThisModule, "VPL settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VPL'] = settingsDict self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
def load(self): """ Load a single source file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') if BOS81 is None: BOS81 = BibleOrganisationalSystem('GENERIC-KJV-80-ENG') if BOSx is None: BOSx = BibleOrganisationalSystem('GENERIC-ENG') if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount == 1: if self.encoding.lower() == 'utf-8' and line[0] == chr( 65279): #U+FEFF or \ufeff logging.info( " ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[ 1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search('^; TITLE:\\s', line) if match: if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "First line got type {!r} match from {!r}". format(match.group(0), line)) else: vPrint( 'Verbose', debuggingThisModule, "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher file line is "' + line + '"' ) lastLine = line # Process header stuff if line.startswith('; TITLE:'): string = line[8:].strip() if string: settingsDict['TITLE'] = string continue elif line.startswith('; ABBREVIATION:'): string = line[15:].strip() if string: settingsDict['ABBREVIATION'] = string continue elif line.startswith('; HAS ITALICS'): string = line[14:].strip() if string: settingsDict['HAS_ITALICS'] = string continue elif line.startswith('; HAS FOOTNOTES:'): string = line[15:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS FOOTNOTES'): string = line[14:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS REDLETTER'): string = line[14:].strip() if string: settingsDict['HAS_REDLETTER'] = string continue elif line[0] == ';': logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}" .format(line)) continue # Just discard comment lines # Process the main segment if line.startswith('$$ '): if metadataName and metadataContents: settingsDict[metadataName] = metadataContents metadataName = None pointer = line[3:] #vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) ) if pointer and pointer[0] == '{' and pointer[-1] == '}': metadataName = pointer[1:-1] if metadataName: #vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) ) metadataContents = '' else: # let's assume it's a BCV reference pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) B_CV_Bits = pointer.split(' ', 1) if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: bookCode, CVString = B_CV_Bits chapterNumberString, verseNumberString = CVString.split( ':') chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCode != lastBookCode: # We've started a new book if bookCode in ('Ge', ): BBB = 'GEN' elif bookCode in ('Le', ): BBB = 'LEV' elif bookCode in ('La', ): BBB = 'LAM' ##elif bookCode in ('Es',): BBB = 'EST' ##elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #vPrint( 'Quiet', debuggingThisModule, "4BookCode =", repr(bookCode) ) #BBB = BOS.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode) # Try to guess #vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) ) else: vPrint('Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) continue # Just save the pointer information which refers to the text on the next line else: # it's not a $$ line text = line #vPrint( 'Quiet', debuggingThisModule, "text", repr(text) ) if metadataName: metadataContents += ('\n' if metadataContents else '') + text continue else: vText = text # Handle bits like (<scripref>Pr 2:7</scripref>) vText = vText.replace('(<scripref>', '\\x - \\xt ').replace( '</scripref>)', '\\x*') vText = vText.replace('<scripref>', '\\x - \\xt ').replace( '</scripref>', '\\x*') #if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher vText', repr(vText) ) #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) # Convert {stuff} to footnotes match = re.search('\\{(.+?)\\}', vText) while match: footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1)) vText = vText[:match.start( )] + footnoteText + vText[ match.end():] # Replace this footnote #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\{(.+?)\\}', vText) # Convert [stuff] to added fields match = re.search('\\[(.+?)\\]', vText) while match: addText = '\\add {}\\add*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\[(.+?)\\]', vText) # Convert +r/This text is red-letter-r/ to wj fields match = re.search('\\+r/(.+?)-r/', vText) while match: addText = '\\wj {}\\wj*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\+r/(.+?)-r/', vText) # Final check for unexpected remaining formatting for badChar in '{}[]/': if badChar in vText: logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}" .format(BBB, chapterNumberString, verseNumberString, vText)) break if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB)) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object' thisBook.objectTypeString = 'ForgeForSwordSearcher' verseList = BOSx.getNumVersesList(BBB) numChapters, numVerses = len( verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code" .format(bookCode)) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}". format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})" .format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}" ).format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) # Check for paragraph markers if vText and vText[0] == '¶': thisBook.addLine('p', '') vText = vText[1:].lstrip() #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}" .format(line)) # Save the final book if thisBook is not None: self.stashBook(thisBook) # Clean up if settingsDict: #vPrint( 'Quiet', debuggingThisModule, "ForgeForSwordSearcher settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Forge4SS'] = settingsDict self.applySuppliedMetadata( 'Forge4SS') # Copy some to self.settingsDict self.doPostLoadProcessing()
class OpenSongXMLBible(Bible): """ Class for reading, validating, and converting OpenSong Bible XML. """ treeTag = 'bible' bookTag = 'b' chapterTag = 'c' verseTag = 'v' def __init__(self, sourceFolder, givenName, encoding='utf-8') -> None: """ Constructor: just sets up the XML Bible file converter object. """ # Setup and initialise the base class first dPrint( 'Quiet', debuggingThisModule, "OpenSongXMLBible( {}, {}, {} )".format(sourceFolder, givenName, encoding)) Bible.__init__(self) self.objectNameString = 'OpenSong XML Bible object' self.objectTypeString = 'OpenSong' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.XMLTree = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): vPrint( 'Quiet', debuggingThisModule, "OpenSongXMLBible: File {!r} is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of OpenSongXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all # Find the main (bible) container if self.XMLTree.tag == OpenSongXMLBible.treeTag: location = "XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') name = shortName = None for attrib, value in self.XMLTree.items(): if attrib == "n": name = value elif attrib == "sn": shortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element". format(attrib, value)) # Find the submain (book) containers for element in self.XMLTree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.XMLTree.tag)) self.doPostLoadProcessing() # end of OpenSongXMLBible.load def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames vPrint('Verbose', debuggingThisModule, _("Validating OpenSong XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText( bookName) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName) # Try non-English booknames #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB ) if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation( BBB) if not USFMAbbreviation: logging.critical( f"Unable to find USFM abbreviation for '{BBB}'") if BibleOrgSysGlobals.strictCheckingFlag: halt USFMAbbreviation = 'XXA' thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), programNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName # end of OpenSongXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB: str, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML chapter…")) # Process the div attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value elif attrib == "VERSES": numVerses = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #dPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '') # Fix a mistake in the Chinese_SU module thisBook.addLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for {}".format(BBB)) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks') verseNumber = toVerseNumber = None for attrib, value in element.items(): if attrib == "n": verseNumber = value elif attrib == "t": toVerseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element". format(attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber #thisBook.addLine( 'v', verseNumber ) vText = element.text if element.text else '' for subelement in element: sub2location = "{} in {}".format(subelement.tag, sublocation) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sub2location, 'ks03') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sub2location, 'ks05') if subelement.tag == 'i': vText += '\\it {}\\it*{}'.format( subelement.text, subelement.tail) else: logging.error( "Expected to find 'i' but got {!r}".format( subelement.tag)) vText += element.tail if element.tail else '' if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, verseNumber)) #dPrint( 'Quiet', debuggingThisModule, 'vText1', vText ) if vText: # This is the main text of the verse (follows the verse milestone) #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #dPrint( 'Quiet', debuggingThisModule, "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate(vText.split('\n')): if j == 0: thisBook.addLine('q1', '') thisBook.addLine('v', verseNumber + ' ' + textBit) else: thisBook.addLine('q1', textBit) else: # Just one verse line thisBook.addLine('v', verseNumber + ' ' + vText) #dPrint( 'Quiet', debuggingThisModule, 'vText2', vText ) else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag))