class ZefaniaXMLBible(Bible): """ Class for reading, validating, and converting ZefaniaXMLBible XML. """ XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}" treeTag = 'XMLBIBLE' infoTag = 'INFORMATION' bookTag = 'BIBLEBOOK' chapterTag = 'CHAPTER' captionTag = 'CAPTION' verseTag = 'VERS' noteTag = 'NOTE' styleTag = 'STYLE' breakTag = 'BR' def __init__(self, sourceFolder, givenName, encoding='utf-8'): """ Constructor: just sets up the Zefania Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = "Zefania XML Bible object" self.objectTypeString = "Zefania" # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.tree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG") # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): print("ZefaniaXMLBible: File '{}' is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of ZefaniaXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ if Globals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) self.tree = ElementTree().parse(self.sourceFilepath) if Globals.debugFlag: assert (len(self.tree) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == ZefaniaXMLBible.treeTag: location = "Zefania XML file" Globals.checkXMLNoText(self.tree, location, '4f6h') Globals.checkXMLNoTail(self.tree, location, '1wk8') schema = None name = status = BibleType = revision = version = lgid = None for attrib, value in self.tree.items(): if attrib == ZefaniaXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation': schema = value elif attrib == "biblename": name = value elif attrib == "lgid": lgid = value # In italian.xml this is set to "german" elif attrib == "status": status = value elif attrib == "type": BibleType = value elif attrib == "revision": revision = value elif attrib == "version": version = value else: logging.warning( "Unprocessed '{}' attribute ({}) in main element". format(attrib, value)) if name: self.name = name if status: self.status = status if revision: self.revision = revision if version: self.version = version if self.tree[0].tag == 'INFORMATION': self.header = self.tree[0] self.tree.remove(self.header) self.__validateAndExtractHeader() else: # Handle information records at the END of the file ix = len(self.tree) - 1 if self.tree[ix].tag == 'INFORMATION': self.header = self.tree[ix] self.tree.remove(self.header) self.__validateAndExtractHeader() # Find the submain (book) containers for element in self.tree: if element.tag == ZefaniaXMLBible.bookTag: sublocation = "book in " + location Globals.checkXMLNoText(element, sublocation, 'g3g5') Globals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) else: logging.error("Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.bookTag, element.tag)) else: logging.error("Expected to load '{}' but got '{}'".format( ZefaniaXMLBible.treeTag, self.tree.tag)) self.doPostLoadProcessing() # end of ZefaniaXMLBible.load def __validateAndExtractHeader(self): """ Extracts information out of the header record, such as: <INFORMATION> <title>King James Version</title> <creator></creator> <subject>The Holy Bible</subject> <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description> <publisher>FREE BIBLE SOFTWARE GROUP</publisher> <contributors /> <date>2009-01-23</date> <type>Bible</type> <format>Zefania XML Bible Markup Language</format> <identifier>kjv</identifier> <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source> <language>ENG</language> <coverage>provide the Bible to the nations of the world</coverage> <rights>We believe that this Bible is found in the Public Domain.</rights> </INFORMATION> """ if Globals.debugFlag: assert (self.header) location = 'Header' Globals.checkXMLNoAttributes(self.header, location, 'j4j6') Globals.checkXMLNoText(self.header, location, 'sk4l') Globals.checkXMLNoTail(self.header, location, 'a2d4') # TODO: We probably need to rationalise some of the self.xxx stores for element in self.header: #print( "header", element.tag ) if element.tag == 'title': sublocation = "title in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.title = element.text elif element.tag == 'creator': sublocation = "creator in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.creator = element.text elif element.tag == 'subject': sublocation = "subject in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.subject = element.text elif element.tag == 'description': sublocation = "description in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.description = element.text elif element.tag == 'publisher': sublocation = "publisher in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.publisher = element.text elif element.tag == 'contributors': sublocation = "contributors in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.contributors = element.text elif element.tag == 'date': sublocation = "date in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.date = element.text elif element.tag == 'type': sublocation = "type in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.documentType = element.text elif element.tag == 'format': sublocation = "format in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) if Globals.debugFlag: assert ( element.text == 'Zefania XML Bible Markup Language') elif element.tag == 'identifier': sublocation = "identifier in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.identifier = element.text elif element.tag == 'source': sublocation = "source in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.source = element.text elif element.tag == 'language': sublocation = "language in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.language = element.text elif element.tag == 'coverage': sublocation = "coverage in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.coverage = element.text elif element.tag == 'rights': sublocation = "rights in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.rights = element.text else: logging.error("Found unexpected '{}' tag in {}".format( element.tag, location)) # end of ZefaniaXMLBible.__validateAndExtractHeader def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print(_("Validating XML book...")) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib, value in book.items(): if attrib == "bnumber": bookNumber = value elif attrib == "bname": bookName = value elif attrib == "bsname": bookShortName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value)) if bookNumber: try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if Globals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self.name, BBB) thisBook.objectNameString = "Zefania XML Bible Book object" thisBook.objectTypeString = "Zefania" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == ZefaniaXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) Globals.checkXMLNoText(element, sublocation, 'j3jd') Globals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error("Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.chapterTag, element.tag)) if Globals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) # end of ZefaniaXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if Globals.verbosityLevel > 3: print(_("Validating XML chapter...")) # Process the chapter attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "cnumber": chapterNumber = value else: logging.warning( "Unprocessed '{}' attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #print( BBB, 'c', chapterNumber ) thisBook.appendLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for BBB".format(BBB)) for element in chapter: if element.tag == ZefaniaXMLBible.verseTag: location = "verse in {} {}".format(BBB, chapterNumber) self.__validateAndExtractVerse(BBB, chapterNumber, thisBook, element) elif element.tag == ZefaniaXMLBible.captionTag: # Used in Psalms location = "caption in {} {}".format(BBB, chapterNumber) Globals.checkXMLNoTail(element, location, 'k5k8') Globals.checkXMLNoSubelements(element, location, 'd3f5') # Handle caption attributes vRef = None for attrib, value in element.items(): if attrib == "vref": vRef = value if Globals.debugFlag: assert (vRef == '1') else: logging.warning( "Unprocessed '{}' attribute ({}) in caption element" .format(attrib, value)) if Globals.debugFlag: assert (vRef) vText = element.text if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, vRef)) if vText: # This is the main text of the caption #print( "{} {}:{} '{}'".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.appendLine('v', '0' + ' ' + vText) # We save it as verse zero else: logging.error("Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.verseTag, element.tag)) # end of ZefaniaXMLBible.__validateAndExtractChapter def __validateAndExtractVerse(self, BBB, chapterNumber, thisBook, verse): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if Globals.verbosityLevel > 3: print(_("Validating XML verse...")) location = "verse in {} {}".format(BBB, chapterNumber) Globals.checkXMLNoTail(verse, location, 'l5ks') # Handle verse attributes verseNumber = toVerseNumber = None for attrib, value in verse.items(): if attrib == "vnumber": verseNumber = value else: logging.warning( "Unprocessed '{}' attribute ({}) in verse element".format( attrib, value)) if Globals.debugFlag: assert (verseNumber) location = "{}:{}".format( location, verseNumber) # Get a better location description #thisBook.appendLine( 'v', verseNumber ) vText = verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) # Handle verse subelements (notes and styled portions) for subelement in verse: if subelement.tag == ZefaniaXMLBible.noteTag: sublocation = "note in " + location noteType = None for attrib, value in subelement.items(): if attrib == "type": noteType = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subelement" .format(attrib, value)) if noteType not in ( 'n-studynote', 'x-studynote', ): logging.warning("Unexpected {} note type in {}".format( noteType, BBB)) if Globals.debugFlag: assert (noteType) nText, nTail = subelement.text, subelement.tail #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) #thisBook.appendLine( 'ST', css ) # XXXXXXXXXXXXXXXXXXXXXXXXXX Losing data here (for now) #thisBook.appendLine( 'ST=', nText ) if nTail: if '\n' in nTail: print( "ZefaniaXMLBible.__validateAndExtractVerse: nTail {} {}:{} '{}'" .format(BBB, chapterNumber, verseNumber, nTail)) nTail = nTail.replace('\n', ' ') thisBook.appendLine('v~', nTail) for subsubelement in subelement: if subsubelement.tag == ZefaniaXMLBible.styleTag: subsublocation = "style in " + sublocation Globals.checkXMLNoSubelements(subsubelement, subsublocation, 'fyt4') css = idStyle = None for attrib, value in subsubelement.items(): if attrib == "css": css = value elif attrib == "id": idStyle = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subsubelement" .format(attrib, value)) if Globals.debugFlag: assert (css or idStyle) SFM = None if css == "font-style:italic": SFM = '\\it' elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' elif css == "color:#FF0000": SFM = '\\em' elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' elif css is None and idStyle == 'cl:divineName': SFM = '\\nd' else: print("css is", css, "idStyle is", idStyle) halt sText, sTail = subsubelement.text.strip( ), subsubelement.tail if Globals.debugFlag: assert (sText) if SFM: vText += SFM + ' ' + sText + SFM + '*' else: vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() else: logging.error( "Expected to find {} but got '{}' in {}".format( ZefaniaXMLBible.styleTag, subsubelement.tag, sublocation)) elif subelement.tag == ZefaniaXMLBible.styleTag: sublocation = "style in " + location Globals.checkXMLNoSubelements(subelement, sublocation, 'f5gh') css = idStyle = None for attrib, value in subelement.items(): if attrib == "css": css = value elif attrib == "id": idStyle = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subelement" .format(attrib, value)) if Globals.debugFlag: assert (css or idStyle) SFM = None if css == "font-style:italic": SFM = '\\it' elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' elif css == "color:#FF0000": SFM = '\\em' elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' elif css is None and idStyle == 'cl:divineName': SFM = '\\nd' else: print("css is", css, "idStyle is", idStyle) halt sText, sTail = subelement.text.strip(), subelement.tail if Globals.debugFlag: assert (sText) if SFM: vText += SFM + ' ' + sText + SFM + '*' else: vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() elif subelement.tag == ZefaniaXMLBible.breakTag: sublocation = "line break in " + location Globals.checkXMLNoText(subelement, sublocation, 'c1d4') Globals.checkXMLNoSubelements(subelement, sublocation, 'g4g8') art = None for attrib, value in subelement.items(): if attrib == "art": art = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subelement" .format(attrib, value)) if Globals.debugFlag: assert (art == 'x-nl') #print( BBB, chapterNumber, verseNumber ) #assert( vText ) if vText: thisBook.appendLine('v', verseNumber + ' ' + vText) vText = '' thisBook.appendLine( 'm', subelement.tail.strip() if subelement.tail else '') #bTail = subelement.tail #if bTail: vText = bTail.strip() else: logging.error( "Expected to find NOTE or STYLE but got '{}' in {}".format( subelement.tag, location)) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: print( "ZefaniaXMLBible.__validateAndExtractVerse: vText {} {}:{} '{}'" .format(BBB, chapterNumber, verseNumber, vText)) vText = vText.replace('\n', ' ') thisBook.appendLine('v', verseNumber + ' ' + vText)
class OpenSongXMLBible( Bible ): """ Class for reading, validating, and converting OpenSong Bible XML. """ treeTag = 'bible' bookTag = 'b' chapterTag = 'c' verseTag = 'v' def __init__( self, sourceFolder, givenName, encoding='utf-8' ): """ Constructor: just sets up the XML Bible file converter object. """ # Setup and initialise the base class first if BibleOrgSysGlobals.debugFlag: print( "OpenSongXMLBible( {}, {}, {} )".format( sourceFolder, givenName, encoding ) ) Bible.__init__( self ) self.objectNameString = "OpenSong XML Bible object" self.objectTypeString = "OpenSong" # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join( self.sourceFolder, self.givenName ) self.tree = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" ) # Do a preliminary check on the readability of our file if not os.access( self.sourceFilepath, os.R_OK ): print( "OpenSongXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) ) self.name = self.givenName #if self.name is None: #pass # end of OpenSongXMLBible.__init__ def load( self ): """ Load a single source XML file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) self.tree = ElementTree().parse( self.sourceFilepath ) if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == OpenSongXMLBible.treeTag: location = "XML file" BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' ) name = shortName = None for attrib,value in self.tree.items(): if attrib=="n": name = value elif attrib=="sn": shortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) ) # Find the submain (book) containers for element in self.tree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' ) self.__validateAndExtractBook( element ) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag ) ) else: logging.error( "Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.tree.tag ) ) self.doPostLoadProcessing() # end of OpenSongXMLBible.load def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBB( bookName ) # Booknames are in English if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB ) thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) ) thisBook.addLine( 'h', bookName ) thisBook.addLine( 'mt1', bookName ) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results...".format( BBB ) ) self.saveBook( thisBook ) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB else: logging.error( _("OpenSong load can't find a book name") ) # no bookName # end of OpenSongXMLBible.__validateAndExtractBook def __validateAndExtractChapter( self, BBB, thisBook, chapter ): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter...") ) # Process the div attributes first chapterNumber = numVerses = None for attrib,value in chapter.items(): if attrib=="n": chapterNumber = value elif attrib=="VERSES": numVerses = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) ) if chapterNumber: #print( BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '' ) # Fix a mistake in the Chinese_SU module thisBook.addLine( 'c', chapterNumber ) else: logging.error( "Missing 'n' attribute in chapter element for BBB".format( BBB ) ) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'l5ks' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5f7h' ) verseNumber = toVerseNumber = None for attrib,value in element.items(): if attrib=="n": verseNumber = value elif attrib=="t": toVerseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( verseNumber ) #thisBook.addLine( 'v', verseNumber ) vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) if vText: # This is the main text of the verse (follows the verse milestone) #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #print( "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate( vText.split( '\n' ) ): if j==0: thisBook.addLine( 'q1', '' ) thisBook.addLine( 'v', verseNumber + ' ' + textBit ) else: thisBook.addLine( 'q1', textBit ) else: # Just one verse line thisBook.addLine( 'v', verseNumber + ' ' + vText ) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag ) )
class HaggaiXMLBible( Bible ): """ Class for reading, validating, and converting HaggaiXMLBible XML. """ XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}" treeTag = 'XMLBIBLE' infoTag = 'INFORMATION' bookTag = 'BIBLEBOOK' chapterTag = 'CHAPTER' captionTag = 'CAPTION' paragraphTag = 'PARAGRAPH' verseTag = 'VERSE' noteTag = 'NOTE' styleTag = 'STYLE' breakTag = 'BR' def __init__( self, sourceFolder, givenName, encoding='utf-8' ): """ Constructor: just sets up the Haggai Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = "Haggai XML Bible object" self.objectTypeString = "Haggai" # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join( self.sourceFolder, self.givenName ) self.tree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" ) # Do a preliminary check on the readability of our file if not os.access( self.sourceFilepath, os.R_OK ): print( "HaggaiXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) ) self.name = self.givenName #if self.name is None: #pass # end of HaggaiXMLBible.__init__ def load( self ): """ Load a single source XML file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) self.tree = ElementTree().parse( self.sourceFilepath ) if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == HaggaiXMLBible.treeTag: location = "Haggai XML file" BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' ) schema = name = status = BibleType = revision = version = lgid = None for attrib,value in self.tree.items(): if attrib == HaggaiXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation': schema = value elif attrib == "biblename": name = value elif attrib == "lgid": lgid = value # In italian.xml this is set to "german" elif attrib == "status": status = value elif attrib == "type": BibleType = value elif attrib == "revision": revision = value elif attrib == "version": version = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) ) if name: self.name = name if status: self.status = status if revision: self.revision = revision if version: self.version = version if self.tree[0].tag == 'INFORMATION': self.header = self.tree[0] self.tree.remove( self.header ) self.__validateAndExtractHeader() else: # Handle information records at the END of the file ix = len(self.tree) - 1 if self.tree[ix].tag == 'INFORMATION': self.header = self.tree[ix] self.tree.remove( self.header ) self.__validateAndExtractHeader() # Find the submain (book) containers for element in self.tree: if element.tag == HaggaiXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' ) self.__validateAndExtractBook( element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.bookTag, element.tag ) ) else: logging.error( "Expected to load {!r} but got {!r}".format( HaggaiXMLBible.treeTag, self.tree.tag ) ) self.doPostLoadProcessing() # end of HaggaiXMLBible.load def __validateAndExtractHeader( self ): """ Extracts information out of the header record, such as: <INFORMATION> <title>King James Version</title> <creator></creator> <subject>The Holy Bible</subject> <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description> <publisher>FREE BIBLE SOFTWARE GROUP</publisher> <contributors /> <date>2009-01-23</date> <type>Bible</type> <format>Haggai XML Bible Markup Language</format> <identifier>kjv</identifier> <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source> <language>ENG</language> <coverage>provide the Bible to the nations of the world</coverage> <rights>We believe that this Bible is found in the Public Domain.</rights> </INFORMATION> """ if BibleOrgSysGlobals.debugFlag: assert( self.header ) location = 'Header' BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' ) BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' ) BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' ) # TODO: We probably need to rationalise some of the self.xxx stores for element in self.header: #print( "header", element.tag ) if element.tag == 'title': sublocation = "title in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) self.title = element.text elif element.tag == 'creator': sublocation = "creator in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.creator = element.text elif element.tag == 'subject': sublocation = "subject in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.subject = element.text elif element.tag == 'description': sublocation = "description in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) self.description = element.text elif element.tag == 'publisher': sublocation = "publisher in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.publisher = element.text elif element.tag == 'contributor': sublocation = "contributor in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' ) if element.text: try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one elif element.tag == 'contributors': sublocation = "contributors in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.contributors = element.text elif element.tag == 'date': sublocation = "date in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) self.date = element.text elif element.tag == 'type': sublocation = "type in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.documentType = element.text elif element.tag == 'format': sublocation = "format in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) if BibleOrgSysGlobals.debugFlag: assert( element.text == 'Haggai XML Bible Markup Language' ) elif element.tag == 'identifier': sublocation = "identifier in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) self.identifier = element.text elif element.tag == 'source': sublocation = "source in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) self.source = element.text elif element.tag == 'language': sublocation = "language in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert( element.text ) self.language = element.text elif element.tag == 'coverage': sublocation = "coverage in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.coverage = element.text elif element.tag == 'rights': sublocation = "rights in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.rights = element.text else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) ) # end of HaggaiXMLBible.__validateAndExtractHeader def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book...") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB( bookName ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = "Haggai XML Bible Book object" thisBook.objectTypeString = "Haggai" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == HaggaiXMLBible.captionTag: sublocation = "caption in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' ) thisBook.addLine( 'mt', element.text ) elif element.tag == HaggaiXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results...".format( BBB ) ) self.saveBook( thisBook ) # end of HaggaiXMLBible.__validateAndExtractBook def __validateAndExtractChapter( self, BBB, thisBook, chapter ): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter...") ) # Process the chapter attributes first chapterNumber = numVerses = None for attrib,value in chapter.items(): if attrib=="cnumber": chapterNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) ) if chapterNumber: #print( BBB, 'c', chapterNumber ) thisBook.addLine( 'c', chapterNumber ) else: logging.error( "Missing 'n' attribute in chapter element for BBB".format( BBB ) ) for element in chapter: if element.tag == HaggaiXMLBible.paragraphTag: location = "paragraph in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractParagraph( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.verseTag+'disabled': location = "verse in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms location = "caption in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' ) # Handle caption attributes vRef = None for attrib,value in element.items(): if attrib=="vref": vRef = value if BibleOrgSysGlobals.debugFlag: assert( vRef == '1' ) else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( vRef ) vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) ) if vText: # This is the main text of the caption #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) ) # end of HaggaiXMLBible.__validateAndExtractChapter def __validateAndExtractParagraph( self, BBB, chapterNumber, thisBook, paragraph ): """ Check/validate and extract paragraph data from the given XML book record finding and saving paragraphs and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML paragraph...") ) location = "paragraph in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoAttributes( paragraph, location, 'brgw3' ) BibleOrgSysGlobals.checkXMLNoText( paragraph, location, 'brgw3' ) BibleOrgSysGlobals.checkXMLNoTail( paragraph, location, 'brgw3' ) thisBook.addLine( 'p', '' ) # Handle verse subelements (verses) for element in paragraph: if element.tag == HaggaiXMLBible.verseTag: location = "verse in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms location = "caption in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' ) # Handle caption attributes vRef = None for attrib,value in element.items(): if attrib=="vref": vRef = value if BibleOrgSysGlobals.debugFlag: assert( vRef == '1' ) else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( vRef ) vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) ) if vText: # This is the main text of the caption #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) ) # end of HaggaiXMLBible.__validateAndExtractParagraph def __validateAndExtractVerse( self, BBB, chapterNumber, thisBook, verse ): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML verse...") ) location = "verse in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( verse, location, 'l5ks' ) # Handle verse attributes verseNumber = toVerseNumber = None for attrib,value in verse.items(): if attrib=="vnumber": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( verseNumber ) location = "{}:{}".format( location, verseNumber ) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) # Handle verse subelements (notes and styled portions) for subelement in verse: if subelement.tag == HaggaiXMLBible.noteTag: sublocation = "note in " + location noteType = None for attrib,value in subelement.items(): if attrib=="type": noteType = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if noteType and noteType not in ('variant',): logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) nText, nTail = subelement.text, subelement.tail #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) if nTail: if '\n' in nTail: print( "HaggaiXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) nTail = nTail.replace( '\n', ' ' ) vText += nTail for subsubelement in subelement: if subsubelement.tag == HaggaiXMLBible.styleTag: subsublocation = "style in " + sublocation BibleOrgSysGlobals.checkXMLNoSubelements( subsubelement, subsublocation, 'fyt4' ) fs = css = idStyle = None for attrib,value in subsubelement.items(): if attrib=='fs': fs = value #elif attrib=="css": css = value #elif attrib=="id": idStyle = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subsubelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( fs or css or idStyle ) SFM = None if fs == 'italic': SFM = '\\it' elif fs == 'super': SFM = '\\bdit' elif fs == 'emphasis': SFM = '\\em' else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt #if css == "font-style:italic": SFM = '\\it' #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' #elif css == "color:#FF0000": SFM = '\\em' #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' #elif css is None and idStyle=='cl:divineName': SFM = '\\nd' #else: print( "css is", css, "idStyle is", idStyle ); halt sText, sTail = subsubelement.text.strip(), subsubelement.tail if BibleOrgSysGlobals.debugFlag: assert( sText ) if SFM: vText += SFM+' ' + sText + SFM+'*' else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() else: logging.error( "Expected to find {} but got {!r} in {}".format( HaggaiXMLBible.styleTag, subsubelement.tag, sublocation ) ) elif subelement.tag == HaggaiXMLBible.styleTag: sublocation = "style in " + location BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) fs = css = idStyle = None for attrib,value in subelement.items(): if attrib=="fs": fs = value #elif attrib=="css": css = value #elif attrib=="id": idStyle = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( fs ) SFM = None if fs == 'super': SFM = '\\bdit' elif fs == 'emphasis': SFM = '\\em' else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt #if css == "font-style:italic": SFM = '\\it' #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' #elif css == "color:#FF0000": SFM = '\\em' #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' #elif css is None and idStyle=='cl:divineName': SFM = '\\nd' #else: print( "css is", css, "idStyle is", idStyle ); halt sText, sTail = subelement.text.strip(), subelement.tail if BibleOrgSysGlobals.debugFlag: assert( sText ) #print( BBB, chapterNumber, sublocation ) if SFM: vText += SFM+' ' + sText + SFM+'*' else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() elif subelement.tag == HaggaiXMLBible.breakTag: sublocation = "line break in " + location BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) art = None for attrib,value in subelement.items(): if attrib=="art": art = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( art == 'x-nl' ) #print( BBB, chapterNumber, verseNumber ) #assert( vText ) if vText: thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None vText = '' thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) #bTail = subelement.tail #if bTail: vText = bTail.strip() else: logging.error( "Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: print( "HaggaiXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) vText = vText.replace( '\n', ' ' ) thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
class OpenSongXMLBible(Bible): """ Class for reading, validating, and converting OpenSong Bible XML. """ treeTag = 'bible' bookTag = 'b' chapterTag = 'c' verseTag = 'v' def __init__(self, sourceFolder, givenName, encoding='utf-8'): """ Constructor: just sets up the XML Bible file converter object. """ # Setup and initialise the base class first if Globals.debugFlag: print("OpenSongXMLBible( {}, {}, {} )".format( sourceFolder, givenName, encoding)) Bible.__init__(self) self.objectNameString = "OpenSong XML Bible object" self.objectTypeString = "OpenSong" # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.tree = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG") # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): print("OpenSongXMLBible: File '{}' is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of OpenSongXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ if Globals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) self.tree = ElementTree().parse(self.sourceFilepath) if Globals.debugFlag: assert (len(self.tree) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == OpenSongXMLBible.treeTag: location = "XML file" Globals.checkXMLNoText(self.tree, location, '4f6h') Globals.checkXMLNoTail(self.tree, location, '1wk8') name = shortName = None for attrib, value in self.tree.items(): if attrib == "n": name = value elif attrib == "sn": shortName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in main element". format(attrib, value)) # Find the submain (book) containers for element in self.tree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location Globals.checkXMLNoText(element, sublocation, 'g3g5') Globals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error("Expected to find '{}' but got '{}'".format( OpenSongXMLBible.bookTag, element.tag)) else: logging.error("Expected to load '{}' but got '{}'".format( OpenSongXMLBible.treeTag, self.tree.tag)) # end of OpenSongXMLBible.load def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print(_("Validating OpenSong XML book...")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if Globals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self.name, BBB) thisBook.objectNameString = "OpenSong XML Bible Book object" thisBook.objectTypeString = "OpenSong" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) Globals.checkXMLNoText(element, sublocation, 'j3jd') Globals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag)) if Globals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName)) logging.error(_("OpenSong load can't find a book name")) # end of OpenSongXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if Globals.verbosityLevel > 3: print(_("Validating XML chapter...")) # Process the div attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value elif attrib == "VERSES": numVerses = value else: logging.warning( "Unprocessed '{}' attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #print( BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '') # Fix a mistake in the Chinese_SU module thisBook.appendLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for BBB".format(BBB)) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format(BBB, chapterNumber) Globals.checkXMLNoTail(element, sublocation, 'l5ks') Globals.checkXMLNoSubelements(element, sublocation, '5f7h') verseNumber = toVerseNumber = None for attrib, value in element.items(): if attrib == "n": verseNumber = value elif attrib == "t": toVerseNumber = value else: logging.warning( "Unprocessed '{}' attribute ({}) in verse element". format(attrib, value)) if Globals.debugFlag: assert (verseNumber) #thisBook.appendLine( 'v', verseNumber ) vText = element.text if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, verseNumber)) if vText: # This is the main text of the verse (follows the verse milestone) #print( "{} {}:{} '{}'".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #print( "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate(vText.split('\n')): if j == 0: thisBook.appendLine('q1', '') thisBook.appendLine( 'v', verseNumber + ' ' + textBit) else: thisBook.appendLine('q1', textBit) else: # Just one verse line thisBook.appendLine('v', verseNumber + ' ' + vText) else: logging.error("Expected to find '{}' but got '{}'".format( OpenSongXMLBible.verseTag, element.tag))