def loadSystems( self, XMLFolder=None ): """ Load and pre-process the specified book order systems. """ if not self._XMLSystems: # Only ever do this once if XMLFolder is None: XMLFolder = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( 'BookOrders/' ) # Relative to module, not cwd self.__XMLFolder = XMLFolder vPrint( 'Info', debuggingThisModule, _("Loading book order systems from {}…").format( self.__XMLFolder ) ) filenamePrefix = "BIBLEBOOKORDER_" for filename in os.listdir( self.__XMLFolder ): filepart, extension = os.path.splitext( filename ) if extension.upper() == '.XML' and filepart.upper().startswith(filenamePrefix): bookOrderSystemCode = filepart[len(filenamePrefix):] vPrint( 'Verbose', debuggingThisModule, _(" Loading{} book order system from {}…").format( bookOrderSystemCode, filename ) ) self._XMLSystems[bookOrderSystemCode] = {} self._XMLSystems[bookOrderSystemCode]['tree'] = ElementTree().parse( os.path.join( self.__XMLFolder, filename ) ) assert self._XMLSystems[bookOrderSystemCode]['tree'] # Fail here if we didn't load anything at all # Check and remove the header element if self._XMLSystems[bookOrderSystemCode]['tree'].tag == self.XMLTreeTag: header = self._XMLSystems[bookOrderSystemCode]['tree'][0] if header.tag == self.headerTag: self._XMLSystems[bookOrderSystemCode]['header'] = header self._XMLSystems[bookOrderSystemCode]['tree'].remove( header ) BibleOrgSysGlobals.checkXMLNoText( header, 'header' ) BibleOrgSysGlobals.checkXMLNoTail( header, 'header' ) BibleOrgSysGlobals.checkXMLNoAttributes( header, 'header' ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" ) if work.tag == "work": self._XMLSystems[bookOrderSystemCode]['version'] = work.find('version').text self._XMLSystems[bookOrderSystemCode]['date'] = work.find('date').text self._XMLSystems[bookOrderSystemCode]['title'] = work.find('title').text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for {!r} tag)").format( self.headerTag ) ) else: logging.error( _("Expected to load {!r} but got {!r}").format( self.XMLTreeTag, self._XMLSystems[bookOrderSystemCode]['tree'].tag ) ) bookCount = 0 # There must be an easier way to do this for subelement in self._XMLSystems[bookOrderSystemCode]['tree']: bookCount += 1 vPrint( 'Info', debuggingThisModule, _(" Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) ) logging.info( _(" Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) ) if BibleOrgSysGlobals.strictCheckingFlag: self.__validateSystem( self._XMLSystems[bookOrderSystemCode]['tree'], bookOrderSystemCode ) else: # The data must have been already loaded if XMLFolder is not None and XMLFolder!=self.__XMLFolder: logging.error( _("Bible book order systems are already loaded -- your different folder of {!r} was ignored").format( self.__XMLFolder ) ) return self
def __validateAndExtractBook(self, book, bookNumber): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText(bookName) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText(adjustedBookName) BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookNumber) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB)) BBB = BBB2 #vPrint( 'Quiet', debuggingThisModule, BBB ); halt if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook)
def __load(self, XMLFileOrFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert XMLFileOrFilepath self.__XMLFileOrFilepath = XMLFileOrFilepath assert self._XMLTree is None or len( self._XMLTree) == 0 # Make sure we're not doing this twice vPrint( 'Info', debuggingThisModule, _("Loading BibleReferencesLinks XML file from {!r}…").format( self.__XMLFileOrFilepath)) self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath) assert self._XMLTree # Fail here if we didn't load anything at all if self._XMLTree.tag == self._treeTag: header = self._XMLTree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLTree.remove(header) BibleOrgSysGlobals.checkXMLNoText(header, 'header') BibleOrgSysGlobals.checkXMLNoTail(header, 'header') BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header') if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText(work, "work in header") BibleOrgSysGlobals.checkXMLNoTail(work, "work in header") BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header") if work.tag == "work": self.PROGRAM_VERSION = work.find('version').text self.dateString = work.find('date').text self.titleString = work.find('title').text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag))) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( header.tail)) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLTree.tag))
def validateEntries(self, segment) -> None: """ Check/validate the given Strongs lexicon entries. """ if BibleOrgSysGlobals.debugFlag: assert segment.tag == 'entries' BibleOrgSysGlobals.checkXMLNoText(segment, segment.tag, "kw99") BibleOrgSysGlobals.checkXMLNoTail(segment, segment.tag, "ls90") BibleOrgSysGlobals.checkXMLNoAttributes(segment, segment.tag, "hsj2") self.StrongsEntries = {} for element in segment: if element.tag == 'entry': self.validateEntry(element)
def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all # Find the main (bible) container if self.XMLTree.tag == OpenSongXMLBible.treeTag: location = "XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') name = shortName = None for attrib, value in self.XMLTree.items(): if attrib == "n": name = value elif attrib == "sn": shortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element". format(attrib, value)) # Find the submain (book) containers for element in self.XMLTree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.XMLTree.tag)) self.doPostLoadProcessing()
def _validate( self ): """ Check/validate the loaded data. """ assert self._XMLTree uniqueDict = {} #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] for j,element in enumerate(self._XMLTree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( "Compulsory {!r} attribute is missing from {} element in record {}".format( attributeName, element.tag, j ) ) if not attributeValue and attributeName!="type": logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( "Optional {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}".format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None and attributeName!="reference_name": if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}".format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) else: logging.warning( "Unexpected element: {} in record {}".format( element.tag, j ) )
def __validateAndExtractVerse(self, BBB: str, chapterNumber, thisBook, verse): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3: vPrint('Quiet', debuggingThisModule, _("Validating XML verse…")) location = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20') BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks') # Handle verse attributes verseNumber = toVerseNumber = None for attrib, value in verse.items(): if attrib == "n": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber location = "{}:{}".format( location, verseNumber) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) ## Handle verse subelements (notes and styled portions) #for subelement in verse: #if subelement.tag == VerseViewXMLBible.noteTag: #sublocation = "note in " + location #noteType = None #for attrib,value in subelement.items(): #if attrib=="type": noteType = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if noteType and noteType not in ('variant',): #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) #nText, nTail = subelement.text, subelement.tail ##vPrint( 'Quiet', debuggingThisModule, "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) #if nTail: #if '\n' in nTail: #vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) #nTail = nTail.replace( '\n', ' ' ) #vText += nTail #for sub2element in subelement: #if sub2element.tag == VerseViewXMLBible.styleTag: #sub2location = "style in " + sublocation #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' ) #fs = css = idStyle = None #for attrib,value in sub2element.items(): #if attrib=='fs': fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle #SFM = None #if fs == 'italic': SFM = '\\it' #elif fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt #sText, sTail = sub2element.text.strip(), sub2element.tail #if BibleOrgSysGlobals.debugFlag: assert sText #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) ) #elif subelement.tag == VerseViewXMLBible.styleTag: #sublocation = "style in " + location #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) #fs = css = idStyle = None #for attrib,value in subelement.items(): #if attrib=="fs": fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs #SFM = None #if fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt #sText, sTail = subelement.text.strip(), subelement.tail #if BibleOrgSysGlobals.debugFlag: assert sText ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, sublocation ) #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #elif subelement.tag == VerseViewXMLBible.breakTag: #sublocation = "line break in " + location #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) #art = None #for attrib,value in subelement.items(): #if attrib=="art": #art = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl' ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber ) ##assert vText #if vText: #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None #vText = '' #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) ##bTail = subelement.tail ##if bTail: vText = bTail.strip() #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}" .format(BBB, chapterNumber, verseNumber, vText)) vText = vText.replace('\n', ' ') thisBook.addLine('v', verseNumber + ' ' + vText) verseNumber = None
def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VerseView'] = {} # Find the main (bible) container if self.XMLTree.tag == VerseViewXMLBible.treeTag: location = "VerseView XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location, 'js24') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') # Find the submain (various info and then book) containers bookNumber = 0 for element in self.XMLTree: if element.tag == VerseViewXMLBible.filenameTag: sublocation = "filename in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') #self.filename = element.text elif element.tag == VerseViewXMLBible.revisionTag: sublocation = "revision in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Revision'] = element.text elif element.tag == VerseViewXMLBible.titleTag: sublocation = "title in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Title'] = element.text elif element.tag == VerseViewXMLBible.fontTag: sublocation = "font in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Font'] = element.text elif element.tag == VerseViewXMLBible.copyrightTag: sublocation = "copyright in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Copyright'] = element.text elif element.tag == VerseViewXMLBible.sizefactorTag: sublocation = "sizefactor in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') if BibleOrgSysGlobals.debugFlag: assert element.text == '1' elif element.tag == VerseViewXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') bookNumber += 1 self.__validateAndExtractBook(element, bookNumber) else: logging.error( "xk15 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( VerseViewXMLBible.treeTag, self.XMLTree.tag)) if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: # These are all compulsory so they should all exist #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) ) vPrint( 'Quiet', debuggingThisModule, "Revision is {!r}".format( self.suppliedMetadata['VerseView']['Revision'])) vPrint( 'Quiet', debuggingThisModule, "Title is {!r}".format( self.suppliedMetadata['VerseView']['Title'])) vPrint( 'Quiet', debuggingThisModule, "Font is {!r}".format( self.suppliedMetadata['VerseView']['Font'])) vPrint( 'Quiet', debuggingThisModule, "Copyright is {!r}".format( self.suppliedMetadata['VerseView']['Copyright'])) #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) ) self.applySuppliedMetadata( 'VerseView') # Copy some to self.settingsDict self.doPostLoadProcessing()
def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLTree if you prefer.) """ def makeList(parameter1, parameter2): """ Returns a list containing all parameters. Parameter1 may already be a list. """ if isinstance(parameter1, list): #assert parameter2 not in parameter1 parameter1.append(parameter2) return parameter1 else: return [parameter1, parameter2] # end of makeList assert self._XMLTree if self.__DataList: # We've already done an import/restructuring -- no need to repeat it return self.__DataList, self.__DataDict # We'll create a number of dictionaries with different elements as the key rawRefLinkList = [] actualLinkCount = 0 for element in self._XMLTree: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) ) # Get these first for helpful error messages sourceReference = element.find('sourceReference').text sourceComponent = element.find('sourceComponent').text assert sourceComponent in ( 'Section', 'Verses', 'Verse', ) BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1') BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference, 'kd21') BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20') actualRawLinksList = [] for subelement in element: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) ) if subelement.tag in ( 'sourceReference', 'sourceComponent', ): # already processed these BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'sqw1') elif subelement.tag == 'BibleReferenceLink': BibleOrgSysGlobals.checkXMLNoText(subelement, sourceReference, 'haw9') BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'jsd9') targetReference = subelement.find('targetReference').text targetComponent = subelement.find('targetComponent').text assert targetComponent in ( 'Section', 'Verses', 'Verse', ) linkType = subelement.find('linkType').text assert linkType in ( 'TSK', 'QuotedOTReference', 'AlludedOTReference', 'PossibleOTReference', ) actualRawLinksList.append(( targetReference, targetComponent, linkType, )) actualLinkCount += 1 rawRefLinkList.append(( sourceReference, sourceComponent, actualRawLinksList, )) vPrint( 'Normal', debuggingThisModule, f" {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)" ) myRefLinkList = [] actualLinkCount = 0 BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') for j, (sourceReference, sourceComponent, actualRawLinksList) in enumerate(rawRefLinkList): # Just do some testing first if sourceComponent == 'Verse': x = SimpleVerseKey(sourceReference) else: flag = False try: x = SimpleVerseKey(sourceReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( sourceComponent, sourceReference)) raise TypeError # Now do the actual parsing parsedSourceReference = FlexibleVersesKey(sourceReference) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, j, sourceComponent, sourceReference, parsedSourceReference) #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference actualLinksList = [] for k, (targetReference, targetComponent, linkType) in enumerate(actualRawLinksList): # Just do some testing first if targetComponent == 'Verse': x = SimpleVerseKey(targetReference) else: flag = False try: x = SimpleVerseKey(targetReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( targetComponent, targetReference)) raise TypeError # Now do the actual parsing try: parsedTargetReference = FlexibleVersesKey(targetReference) except TypeError: logging.error( " Temporarily ignored {!r} (TypeError from FlexibleVersesKey)" .format(targetReference)) parsedTargetReference = None if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, ' ', targetComponent, targetReference, parsedTargetReference) #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference actualLinksList.append(( targetReference, targetComponent, parsedTargetReference, linkType, )) actualLinkCount += 1 myRefLinkList.append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) vPrint( 'Normal', debuggingThisModule, " {:,} links processed (with {:,} actual link entries)".format( len(rawRefLinkList), actualLinkCount)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt self.__DataList = myRefLinkList # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Create a link dictionary (by verse key) myRefLinkDict = {} for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for verseRef in parsedSourceReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt originalLinks = len(myRefLinkDict) vPrint( 'Quiet', debuggingThisModule, " {:,} verse links added to dictionary (includes filling out spans)" .format(originalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt # Create a reversed link dictionary (by verse key) for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList: if parsedTargetReference is not None: for verseRef in parsedTargetReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if linkType == 'TSK': reverseLinkType = 'TSKQuoted' elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted' elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded' elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible' else: halt # Have a new linkType! if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (targetReference, targetComponent, parsedTargetReference, [ (sourceReference, sourceComponent, parsedSourceReference, reverseLinkType) ])) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt totalLinks = len(myRefLinkDict) reverseLinks = totalLinks - originalLinks vPrint( 'Quiet', debuggingThisModule, " {:,} reverse links added to dictionary to give {:,} total". format(reverseLinks, totalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt self.__DataDict = myRefLinkDict # Let's find the most number of references for a verse mostReferences = totalReferences = 0 for verseRef, entryList in self.__DataDict.items(): numRefs = len(entryList) if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef totalReferences += numRefs vPrint( 'Quiet', debuggingThisModule, " {:,} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText())) vPrint('Quiet', debuggingThisModule, " {:,} total links for all references".format(totalReferences)) return self.__DataList, self.__DataDict
def __validate(self): """ Check/validate the loaded data. """ assert self._XMLTree uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLTree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Get the sourceComponent to use as a record ID ID = element.find("sourceComponent").text # Check compulsory elements for elementName in self._compulsoryElements: foundElement = element.find(elementName) if foundElement is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, j)) else: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag ) if not foundElement.text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check optional elements for elementName in self._optionalElements: foundElement = element.find(elementName) if foundElement is not None: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag) if not foundElement.text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, j)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}" ).format(element.tail, element.tag, j)) if self._XMLTree.tail is not None and self._XMLTree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLTree.tail, self._XMLTree.tag))
def __validateSystem(self, bookOrderTree, systemName): """ Do a semi-automatic check of the XML file validity. """ assert bookOrderTree uniqueDict = {} for elementName in self.uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self.uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for k, element in enumerate(bookOrderTree): if element.tag == self.mainElementTag: BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check ascending ID field ID = element.get("id") intID = int(ID) if intID != expectedID: logging.error( _("ID numbers out of sequence in record {} (got {} when expecting {}) for {}" ).format(k, intID, expectedID, systemName)) expectedID += 1 # Check that this is unique if element.text: if element.text in uniqueDict: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {}) for {}" ).format(element.text, element.tag, ID, k, systemName)) uniqueDict[element.text] = None # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, k)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check optional attributes on this main element for attributeName in self.optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, k)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, k)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Check compulsory elements for elementName in self.compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, k)) if not element.find(elementName).text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check optional elements for elementName in self.optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, k)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, k)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, k))
def __validateSystem( self, systemName ): """ Checks for basic formatting/content errors in a Bible book name system. """ assert systemName assert self.__XMLSystems[systemName]['tree'] if len(self.__XMLSystems[systemName]["languageCode"]) != 3: logging.error( _("Couldn't find 3-letter language code in {!r} book names system").format( systemName ) ) #if self.__ISOLanguages and not self.__ISOLanguages.isValidLanguageCode( self.__XMLSystems[systemName]["languageCode"] ): # Check that we have a valid language code #logging.error( _("Unrecognized {!r} ISO-639-3 language code in {!r} book names system").format( self.__XMLSystems[systemName]["languageCode"], systemName ) ) uniqueDict = {} for index in range( len(self.mainElementTags) ): for elementName in self.uniqueElements[index]: uniqueDict["Element_"+str(index)+"_"+elementName] = [] for attributeName in self.uniqueAttributes[index]: uniqueDict["Attribute_"+str(index)+"_"+attributeName] = [] expectedID = 1 for k,element in enumerate(self.__XMLSystems[systemName]['tree']): if element.tag in self.mainElementTags: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag ) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) index = self.mainElementTags.index( element.tag ) # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes[index]: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) ) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) ) # Check optional attributes on this main element for attributeName in self.optionalAttributes[index]: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self.compulsoryAttributes[index] and attributeName not in self.optionalAttributes[index]: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {} in {}").format( attributeName, attributeValue, element.tag, k, systemName ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes[index]: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+str(index)+"_"+attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {} in {}").format( attributeValue, attributeName, element.tag, k, systemName ) ) uniqueDict["Attribute_"+str(index)+"_"+attributeName].append( attributeValue ) # Check compulsory elements for elementName in self.compulsoryElements[index]: if element.find( elementName ) is None: logging.error( _("Compulsory {!r} element is missing (record {}) in {}").format( elementName, k, systemName ) ) if not element.find( elementName ).text: logging.warning( _("Compulsory {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) ) # Check optional elements for elementName in self.optionalElements[index]: if element.find( elementName ) is not None: if not element.find( elementName ).text: logging.warning( _("Optional {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements[index] and subelement.tag not in self.optionalElements[index]: logging.warning( _("Additional {!r} element ({!r}) found (record {}) in {} {}").format( subelement.tag, subelement.text, k, systemName, element.tag ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements[index]: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+str(index)+"_"+elementName]: myLogging = logging.info if element.tag == 'BibleDivisionNames' else logging.error myLogging( _("Found {!r} data repeated in {!r} element (record {}) in {}").format( text, elementName, k, systemName ) ) uniqueDict["Element_"+str(index)+"_"+elementName].append( text ) else: logging.warning( _("Unexpected element: {} in record {} in {}").format( element.tag, k, systemName ) )
def __validateAndExtractChapter(self, BBB: str, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML chapter…")) # Process the div attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value elif attrib == "VERSES": numVerses = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #dPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '') # Fix a mistake in the Chinese_SU module thisBook.addLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for {}".format(BBB)) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks') verseNumber = toVerseNumber = None for attrib, value in element.items(): if attrib == "n": verseNumber = value elif attrib == "t": toVerseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element". format(attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber #thisBook.addLine( 'v', verseNumber ) vText = element.text if element.text else '' for subelement in element: sub2location = "{} in {}".format(subelement.tag, sublocation) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sub2location, 'ks03') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sub2location, 'ks05') if subelement.tag == 'i': vText += '\\it {}\\it*{}'.format( subelement.text, subelement.tail) else: logging.error( "Expected to find 'i' but got {!r}".format( subelement.tag)) vText += element.tail if element.tail else '' if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, verseNumber)) #dPrint( 'Quiet', debuggingThisModule, 'vText1', vText ) if vText: # This is the main text of the verse (follows the verse milestone) #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #dPrint( 'Quiet', debuggingThisModule, "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate(vText.split('\n')): if j == 0: thisBook.addLine('q1', '') thisBook.addLine('v', verseNumber + ' ' + textBit) else: thisBook.addLine('q1', textBit) else: # Just one verse line thisBook.addLine('v', verseNumber + ' ' + vText) #dPrint( 'Quiet', debuggingThisModule, 'vText2', vText ) else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag))
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames vPrint('Verbose', debuggingThisModule, _("Validating OpenSong XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText( bookName) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName) # Try non-English booknames #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB ) if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation( BBB) if not USFMAbbreviation: logging.critical( f"Unable to find USFM abbreviation for '{BBB}'") if BibleOrgSysGlobals.strictCheckingFlag: halt USFMAbbreviation = 'XXA' thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), programNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName
def validateEntry(self, entry) -> None: """ Check/validate the given Strongs Greek lexicon entry. Adds good entries to self.StrongsEntries. """ if BibleOrgSysGlobals.debugFlag: assert entry.tag == 'entry' BibleOrgSysGlobals.checkXMLNoText(entry, entry.tag, "na19") BibleOrgSysGlobals.checkXMLNoTail(entry, entry.tag, "kaq9") # Process the entry attributes first strongs5 = None for attrib, value in entry.items(): if attrib == 'strongs': strongs5 = value #dPrint( 'Never', debuggingThisModule, f"Validating {strongs5} entry…" ) else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element". format(attrib, value)) if BibleOrgSysGlobals.debugFlag: assert len(strongs5) == 5 and strongs5.isdigit() entryResults = {} entryString = "" gettingEssentials = True for j, element in enumerate(entry): #dPrint( 'Quiet', debuggingThisModule, strongs5, j, element.tag, repr(entryString) ) if element.tag == "strongs": if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j == 0 and element.text BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d") if strongs5 != '02717' and (3203 > int(strongs5) > 3302): BibleOrgSysGlobals.checkXMLNoTail(element, element.tag, "f3g7") BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g") strongs = element.text if BibleOrgSysGlobals.debugFlag: assert strongs5.endswith(strongs) if element.tail and element.tail.strip(): entryString += element.tail.strip() elif element.tag == "greek": location = "greek in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "jke0") #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35") # Process the attributes translit = greek = beta = None for attrib, value in element.items(): if attrib == "translit": translit = value elif attrib == "unicode": greek = value elif attrib == "BETA": beta = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}". format(attrib, value, location)) if BibleOrgSysGlobals.debugFlag: assert greek and translit and beta if 'word' not in entryResults: # This is the first/main entry if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j == 1 BibleOrgSysGlobals.checkXMLNoTail(element, location, "ks24") entryResults['word'] = (greek, translit, beta) else: #dPrint( 'Quiet', debuggingThisModule, "Have multiple greek entries in " + strongs5 ) if BibleOrgSysGlobals.debugFlag: assert j > 2 gettingEssentials = False entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5) #.replace( '\n', '' ) elif element.tag == "pronunciation": location = "pronunciation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k") BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20") # Process the attributes pronunciation = None for attrib, value in element.items(): if attrib == "strongs": pronunciation = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}". format(attrib, value, location)) if gettingEssentials: #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) if BibleOrgSysGlobals.debugFlag: assert j == 2 assert pronunciation assert 'pronunciation' not in entryResults entryResults['pronunciation'] = pronunciation else: if BibleOrgSysGlobals.debugFlag: assert j > 2 and not gettingEssentials if element.tail and element.tail.strip(): entryString += element.tail.strip().replace('\n', '') elif element.tag == "strongs_derivation": location = "strongs_derivation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0") BibleOrgSysGlobals.checkXMLNoTail(element, location, "ks24") derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') #dPrint( 'Quiet', debuggingThisModule, strongs5, "derivation", repr(derivation) ) if BibleOrgSysGlobals.debugFlag: assert derivation and '\t' not in derivation and '\n' not in derivation entryString += derivation elif element.tag == "strongs_def": location = "strongs_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0") BibleOrgSysGlobals.checkXMLNoTail(element, location, "jd28") definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') #dPrint( 'Quiet', debuggingThisModule, strongs5, "definition", repr(definition) ) if BibleOrgSysGlobals.debugFlag: assert definition and '\t' not in definition and '\n' not in definition entryString += definition elif element.tag == "kjv_def": location = "kjv_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0") #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" ) #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" ) KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') #dPrint( 'Quiet', debuggingThisModule, strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition entryString += KJVdefinition elif element.tag == "strongsref": location = "strongsref in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "kls2") BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24") strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') if BibleOrgSysGlobals.debugFlag: assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef strongsRef = re.sub('<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef) strongsRef = re.sub('<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef) #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #dPrint( 'Quiet', debuggingThisModule, strongs5, "strongsRef", repr(strongsRef) ) entryString += ' ' + strongsRef elif element.tag == "see": location = "see in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k") BibleOrgSysGlobals.checkXMLNoTail(element, location, "kd02") BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20") # Process the attributes seeLanguage = seeStrongsNumber = None for attrib, value in element.items(): if attrib == "language": seeLanguage = value elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}". format(attrib, value, location)) if BibleOrgSysGlobals.debugFlag: assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit( ) assert seeLanguage in ( 'GREEK', 'HEBREW', ) if 'see' not in entryResults: entryResults['see'] = [] entryResults['see'].append(( 'G' if seeLanguage == 'GREEK' else 'H') + seeStrongsNumber) else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text)) if entryString: #dPrint( 'Quiet', debuggingThisModule, strongs5, "entryString", repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert '\t' not in entryString and '\n' not in entryString entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString) entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString) if BibleOrgSysGlobals.debugFlag: assert 'strongsref' not in entryString entryResults['Entry'] = entryString #dPrint( 'Quiet', debuggingThisModule, "entryResults", entryResults ) self.StrongsEntries[strongs] = entryResults
def _validateSystem(self, punctuationTree, systemName): """ """ assert punctuationTree uniqueDict = {} for elementName in self.uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self.uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] for k, element in enumerate(punctuationTree): if element.tag in self.mainElementTags: BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, k)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check optional attributes on this main element for attributeName in self.optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, k)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, k)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Check compulsory elements for elementName in self.compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, k)) if not element.find(elementName).text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check optional elements for elementName in self.optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, k)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, k)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, k))