def __validateAndExtractBook(self, book, bookNumber): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText(bookName) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText(adjustedBookName) BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookNumber) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB)) BBB = BBB2 #vPrint( 'Quiet', debuggingThisModule, BBB ); halt if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook)
def loadSystems( self, XMLFolder=None ): """ Load and pre-process the specified book order systems. """ if not self._XMLSystems: # Only ever do this once if XMLFolder is None: XMLFolder = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( 'BookOrders/' ) # Relative to module, not cwd self.__XMLFolder = XMLFolder vPrint( 'Info', debuggingThisModule, _("Loading book order systems from {}…").format( self.__XMLFolder ) ) filenamePrefix = "BIBLEBOOKORDER_" for filename in os.listdir( self.__XMLFolder ): filepart, extension = os.path.splitext( filename ) if extension.upper() == '.XML' and filepart.upper().startswith(filenamePrefix): bookOrderSystemCode = filepart[len(filenamePrefix):] vPrint( 'Verbose', debuggingThisModule, _(" Loading{} book order system from {}…").format( bookOrderSystemCode, filename ) ) self._XMLSystems[bookOrderSystemCode] = {} self._XMLSystems[bookOrderSystemCode]['tree'] = ElementTree().parse( os.path.join( self.__XMLFolder, filename ) ) assert self._XMLSystems[bookOrderSystemCode]['tree'] # Fail here if we didn't load anything at all # Check and remove the header element if self._XMLSystems[bookOrderSystemCode]['tree'].tag == self.XMLTreeTag: header = self._XMLSystems[bookOrderSystemCode]['tree'][0] if header.tag == self.headerTag: self._XMLSystems[bookOrderSystemCode]['header'] = header self._XMLSystems[bookOrderSystemCode]['tree'].remove( header ) BibleOrgSysGlobals.checkXMLNoText( header, 'header' ) BibleOrgSysGlobals.checkXMLNoTail( header, 'header' ) BibleOrgSysGlobals.checkXMLNoAttributes( header, 'header' ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" ) if work.tag == "work": self._XMLSystems[bookOrderSystemCode]['version'] = work.find('version').text self._XMLSystems[bookOrderSystemCode]['date'] = work.find('date').text self._XMLSystems[bookOrderSystemCode]['title'] = work.find('title').text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for {!r} tag)").format( self.headerTag ) ) else: logging.error( _("Expected to load {!r} but got {!r}").format( self.XMLTreeTag, self._XMLSystems[bookOrderSystemCode]['tree'].tag ) ) bookCount = 0 # There must be an easier way to do this for subelement in self._XMLSystems[bookOrderSystemCode]['tree']: bookCount += 1 vPrint( 'Info', debuggingThisModule, _(" Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) ) logging.info( _(" Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) ) if BibleOrgSysGlobals.strictCheckingFlag: self.__validateSystem( self._XMLSystems[bookOrderSystemCode]['tree'], bookOrderSystemCode ) else: # The data must have been already loaded if XMLFolder is not None and XMLFolder!=self.__XMLFolder: logging.error( _("Bible book order systems are already loaded -- your different folder of {!r} was ignored").format( self.__XMLFolder ) ) return self
def __load(self, XMLFileOrFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert XMLFileOrFilepath self.__XMLFileOrFilepath = XMLFileOrFilepath assert self._XMLTree is None or len( self._XMLTree) == 0 # Make sure we're not doing this twice vPrint( 'Info', debuggingThisModule, _("Loading BibleReferencesLinks XML file from {!r}…").format( self.__XMLFileOrFilepath)) self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath) assert self._XMLTree # Fail here if we didn't load anything at all if self._XMLTree.tag == self._treeTag: header = self._XMLTree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLTree.remove(header) BibleOrgSysGlobals.checkXMLNoText(header, 'header') BibleOrgSysGlobals.checkXMLNoTail(header, 'header') BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header') if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText(work, "work in header") BibleOrgSysGlobals.checkXMLNoTail(work, "work in header") BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header") if work.tag == "work": self.PROGRAM_VERSION = work.find('version').text self.dateString = work.find('date').text self.titleString = work.find('title').text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag))) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( header.tail)) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLTree.tag))
def validateEntries(self, segment) -> None: """ Check/validate the given Strongs lexicon entries. """ if BibleOrgSysGlobals.debugFlag: assert segment.tag == 'entries' BibleOrgSysGlobals.checkXMLNoText(segment, segment.tag, "kw99") BibleOrgSysGlobals.checkXMLNoTail(segment, segment.tag, "ls90") BibleOrgSysGlobals.checkXMLNoAttributes(segment, segment.tag, "hsj2") self.StrongsEntries = {} for element in segment: if element.tag == 'entry': self.validateEntry(element)
def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all # Find the main (bible) container if self.XMLTree.tag == OpenSongXMLBible.treeTag: location = "XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') name = shortName = None for attrib, value in self.XMLTree.items(): if attrib == "n": name = value elif attrib == "sn": shortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element". format(attrib, value)) # Find the submain (book) containers for element in self.XMLTree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.XMLTree.tag)) self.doPostLoadProcessing()
def _validate( self ): """ Check/validate the loaded data. """ assert self._XMLTree uniqueDict = {} #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] for j,element in enumerate(self._XMLTree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( "Compulsory {!r} attribute is missing from {} element in record {}".format( attributeName, element.tag, j ) ) if not attributeValue and attributeName!="type": logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( "Optional {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}".format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None and attributeName!="reference_name": if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}".format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) else: logging.warning( "Unexpected element: {} in record {}".format( element.tag, j ) )
def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VerseView'] = {} # Find the main (bible) container if self.XMLTree.tag == VerseViewXMLBible.treeTag: location = "VerseView XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location, 'js24') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') # Find the submain (various info and then book) containers bookNumber = 0 for element in self.XMLTree: if element.tag == VerseViewXMLBible.filenameTag: sublocation = "filename in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') #self.filename = element.text elif element.tag == VerseViewXMLBible.revisionTag: sublocation = "revision in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Revision'] = element.text elif element.tag == VerseViewXMLBible.titleTag: sublocation = "title in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Title'] = element.text elif element.tag == VerseViewXMLBible.fontTag: sublocation = "font in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Font'] = element.text elif element.tag == VerseViewXMLBible.copyrightTag: sublocation = "copyright in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Copyright'] = element.text elif element.tag == VerseViewXMLBible.sizefactorTag: sublocation = "sizefactor in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') if BibleOrgSysGlobals.debugFlag: assert element.text == '1' elif element.tag == VerseViewXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') bookNumber += 1 self.__validateAndExtractBook(element, bookNumber) else: logging.error( "xk15 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( VerseViewXMLBible.treeTag, self.XMLTree.tag)) if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: # These are all compulsory so they should all exist #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) ) vPrint( 'Quiet', debuggingThisModule, "Revision is {!r}".format( self.suppliedMetadata['VerseView']['Revision'])) vPrint( 'Quiet', debuggingThisModule, "Title is {!r}".format( self.suppliedMetadata['VerseView']['Title'])) vPrint( 'Quiet', debuggingThisModule, "Font is {!r}".format( self.suppliedMetadata['VerseView']['Font'])) vPrint( 'Quiet', debuggingThisModule, "Copyright is {!r}".format( self.suppliedMetadata['VerseView']['Copyright'])) #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) ) self.applySuppliedMetadata( 'VerseView') # Copy some to self.settingsDict self.doPostLoadProcessing()
def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLTree if you prefer.) """ def makeList(parameter1, parameter2): """ Returns a list containing all parameters. Parameter1 may already be a list. """ if isinstance(parameter1, list): #assert parameter2 not in parameter1 parameter1.append(parameter2) return parameter1 else: return [parameter1, parameter2] # end of makeList assert self._XMLTree if self.__DataList: # We've already done an import/restructuring -- no need to repeat it return self.__DataList, self.__DataDict # We'll create a number of dictionaries with different elements as the key rawRefLinkList = [] actualLinkCount = 0 for element in self._XMLTree: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) ) # Get these first for helpful error messages sourceReference = element.find('sourceReference').text sourceComponent = element.find('sourceComponent').text assert sourceComponent in ( 'Section', 'Verses', 'Verse', ) BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1') BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference, 'kd21') BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20') actualRawLinksList = [] for subelement in element: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) ) if subelement.tag in ( 'sourceReference', 'sourceComponent', ): # already processed these BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'sqw1') elif subelement.tag == 'BibleReferenceLink': BibleOrgSysGlobals.checkXMLNoText(subelement, sourceReference, 'haw9') BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'jsd9') targetReference = subelement.find('targetReference').text targetComponent = subelement.find('targetComponent').text assert targetComponent in ( 'Section', 'Verses', 'Verse', ) linkType = subelement.find('linkType').text assert linkType in ( 'TSK', 'QuotedOTReference', 'AlludedOTReference', 'PossibleOTReference', ) actualRawLinksList.append(( targetReference, targetComponent, linkType, )) actualLinkCount += 1 rawRefLinkList.append(( sourceReference, sourceComponent, actualRawLinksList, )) vPrint( 'Normal', debuggingThisModule, f" {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)" ) myRefLinkList = [] actualLinkCount = 0 BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') for j, (sourceReference, sourceComponent, actualRawLinksList) in enumerate(rawRefLinkList): # Just do some testing first if sourceComponent == 'Verse': x = SimpleVerseKey(sourceReference) else: flag = False try: x = SimpleVerseKey(sourceReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( sourceComponent, sourceReference)) raise TypeError # Now do the actual parsing parsedSourceReference = FlexibleVersesKey(sourceReference) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, j, sourceComponent, sourceReference, parsedSourceReference) #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference actualLinksList = [] for k, (targetReference, targetComponent, linkType) in enumerate(actualRawLinksList): # Just do some testing first if targetComponent == 'Verse': x = SimpleVerseKey(targetReference) else: flag = False try: x = SimpleVerseKey(targetReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( targetComponent, targetReference)) raise TypeError # Now do the actual parsing try: parsedTargetReference = FlexibleVersesKey(targetReference) except TypeError: logging.error( " Temporarily ignored {!r} (TypeError from FlexibleVersesKey)" .format(targetReference)) parsedTargetReference = None if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, ' ', targetComponent, targetReference, parsedTargetReference) #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference actualLinksList.append(( targetReference, targetComponent, parsedTargetReference, linkType, )) actualLinkCount += 1 myRefLinkList.append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) vPrint( 'Normal', debuggingThisModule, " {:,} links processed (with {:,} actual link entries)".format( len(rawRefLinkList), actualLinkCount)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt self.__DataList = myRefLinkList # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Create a link dictionary (by verse key) myRefLinkDict = {} for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for verseRef in parsedSourceReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt originalLinks = len(myRefLinkDict) vPrint( 'Quiet', debuggingThisModule, " {:,} verse links added to dictionary (includes filling out spans)" .format(originalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt # Create a reversed link dictionary (by verse key) for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList: if parsedTargetReference is not None: for verseRef in parsedTargetReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if linkType == 'TSK': reverseLinkType = 'TSKQuoted' elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted' elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded' elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible' else: halt # Have a new linkType! if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (targetReference, targetComponent, parsedTargetReference, [ (sourceReference, sourceComponent, parsedSourceReference, reverseLinkType) ])) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt totalLinks = len(myRefLinkDict) reverseLinks = totalLinks - originalLinks vPrint( 'Quiet', debuggingThisModule, " {:,} reverse links added to dictionary to give {:,} total". format(reverseLinks, totalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt self.__DataDict = myRefLinkDict # Let's find the most number of references for a verse mostReferences = totalReferences = 0 for verseRef, entryList in self.__DataDict.items(): numRefs = len(entryList) if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef totalReferences += numRefs vPrint( 'Quiet', debuggingThisModule, " {:,} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText())) vPrint('Quiet', debuggingThisModule, " {:,} total links for all references".format(totalReferences)) return self.__DataList, self.__DataDict
def __validate(self): """ Check/validate the loaded data. """ assert self._XMLTree uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLTree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Get the sourceComponent to use as a record ID ID = element.find("sourceComponent").text # Check compulsory elements for elementName in self._compulsoryElements: foundElement = element.find(elementName) if foundElement is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, j)) else: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag ) if not foundElement.text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check optional elements for elementName in self._optionalElements: foundElement = element.find(elementName) if foundElement is not None: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag) if not foundElement.text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, j)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}" ).format(element.tail, element.tag, j)) if self._XMLTree.tail is not None and self._XMLTree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLTree.tail, self._XMLTree.tag))
def __validateSystem( self, systemName ): """ Checks for basic formatting/content errors in a Bible book name system. """ assert systemName assert self.__XMLSystems[systemName]['tree'] if len(self.__XMLSystems[systemName]["languageCode"]) != 3: logging.error( _("Couldn't find 3-letter language code in {!r} book names system").format( systemName ) ) #if self.__ISOLanguages and not self.__ISOLanguages.isValidLanguageCode( self.__XMLSystems[systemName]["languageCode"] ): # Check that we have a valid language code #logging.error( _("Unrecognized {!r} ISO-639-3 language code in {!r} book names system").format( self.__XMLSystems[systemName]["languageCode"], systemName ) ) uniqueDict = {} for index in range( len(self.mainElementTags) ): for elementName in self.uniqueElements[index]: uniqueDict["Element_"+str(index)+"_"+elementName] = [] for attributeName in self.uniqueAttributes[index]: uniqueDict["Attribute_"+str(index)+"_"+attributeName] = [] expectedID = 1 for k,element in enumerate(self.__XMLSystems[systemName]['tree']): if element.tag in self.mainElementTags: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag ) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) index = self.mainElementTags.index( element.tag ) # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes[index]: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) ) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) ) # Check optional attributes on this main element for attributeName in self.optionalAttributes[index]: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self.compulsoryAttributes[index] and attributeName not in self.optionalAttributes[index]: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {} in {}").format( attributeName, attributeValue, element.tag, k, systemName ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes[index]: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+str(index)+"_"+attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {} in {}").format( attributeValue, attributeName, element.tag, k, systemName ) ) uniqueDict["Attribute_"+str(index)+"_"+attributeName].append( attributeValue ) # Check compulsory elements for elementName in self.compulsoryElements[index]: if element.find( elementName ) is None: logging.error( _("Compulsory {!r} element is missing (record {}) in {}").format( elementName, k, systemName ) ) if not element.find( elementName ).text: logging.warning( _("Compulsory {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) ) # Check optional elements for elementName in self.optionalElements[index]: if element.find( elementName ) is not None: if not element.find( elementName ).text: logging.warning( _("Optional {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements[index] and subelement.tag not in self.optionalElements[index]: logging.warning( _("Additional {!r} element ({!r}) found (record {}) in {} {}").format( subelement.tag, subelement.text, k, systemName, element.tag ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements[index]: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+str(index)+"_"+elementName]: myLogging = logging.info if element.tag == 'BibleDivisionNames' else logging.error myLogging( _("Found {!r} data repeated in {!r} element (record {}) in {}").format( text, elementName, k, systemName ) ) uniqueDict["Element_"+str(index)+"_"+elementName].append( text ) else: logging.warning( _("Unexpected element: {} in record {} in {}").format( element.tag, k, systemName ) )
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames vPrint('Verbose', debuggingThisModule, _("Validating OpenSong XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText( bookName) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName) # Try non-English booknames #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB ) if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation( BBB) if not USFMAbbreviation: logging.critical( f"Unable to find USFM abbreviation for '{BBB}'") if BibleOrgSysGlobals.strictCheckingFlag: halt USFMAbbreviation = 'XXA' thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), programNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName
def validateEntry(self, entry) -> None: """ Check/validate the given Strongs Greek lexicon entry. Adds good entries to self.StrongsEntries. """ if BibleOrgSysGlobals.debugFlag: assert entry.tag == 'entry' BibleOrgSysGlobals.checkXMLNoText(entry, entry.tag, "na19") BibleOrgSysGlobals.checkXMLNoTail(entry, entry.tag, "kaq9") # Process the entry attributes first strongs5 = None for attrib, value in entry.items(): if attrib == 'strongs': strongs5 = value #dPrint( 'Never', debuggingThisModule, f"Validating {strongs5} entry…" ) else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element". format(attrib, value)) if BibleOrgSysGlobals.debugFlag: assert len(strongs5) == 5 and strongs5.isdigit() entryResults = {} entryString = "" gettingEssentials = True for j, element in enumerate(entry): #dPrint( 'Quiet', debuggingThisModule, strongs5, j, element.tag, repr(entryString) ) if element.tag == "strongs": if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j == 0 and element.text BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d") if strongs5 != '02717' and (3203 > int(strongs5) > 3302): BibleOrgSysGlobals.checkXMLNoTail(element, element.tag, "f3g7") BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g") strongs = element.text if BibleOrgSysGlobals.debugFlag: assert strongs5.endswith(strongs) if element.tail and element.tail.strip(): entryString += element.tail.strip() elif element.tag == "greek": location = "greek in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "jke0") #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35") # Process the attributes translit = greek = beta = None for attrib, value in element.items(): if attrib == "translit": translit = value elif attrib == "unicode": greek = value elif attrib == "BETA": beta = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}". format(attrib, value, location)) if BibleOrgSysGlobals.debugFlag: assert greek and translit and beta if 'word' not in entryResults: # This is the first/main entry if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j == 1 BibleOrgSysGlobals.checkXMLNoTail(element, location, "ks24") entryResults['word'] = (greek, translit, beta) else: #dPrint( 'Quiet', debuggingThisModule, "Have multiple greek entries in " + strongs5 ) if BibleOrgSysGlobals.debugFlag: assert j > 2 gettingEssentials = False entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5) #.replace( '\n', '' ) elif element.tag == "pronunciation": location = "pronunciation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k") BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20") # Process the attributes pronunciation = None for attrib, value in element.items(): if attrib == "strongs": pronunciation = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}". format(attrib, value, location)) if gettingEssentials: #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) if BibleOrgSysGlobals.debugFlag: assert j == 2 assert pronunciation assert 'pronunciation' not in entryResults entryResults['pronunciation'] = pronunciation else: if BibleOrgSysGlobals.debugFlag: assert j > 2 and not gettingEssentials if element.tail and element.tail.strip(): entryString += element.tail.strip().replace('\n', '') elif element.tag == "strongs_derivation": location = "strongs_derivation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0") BibleOrgSysGlobals.checkXMLNoTail(element, location, "ks24") derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') #dPrint( 'Quiet', debuggingThisModule, strongs5, "derivation", repr(derivation) ) if BibleOrgSysGlobals.debugFlag: assert derivation and '\t' not in derivation and '\n' not in derivation entryString += derivation elif element.tag == "strongs_def": location = "strongs_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0") BibleOrgSysGlobals.checkXMLNoTail(element, location, "jd28") definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') #dPrint( 'Quiet', debuggingThisModule, strongs5, "definition", repr(definition) ) if BibleOrgSysGlobals.debugFlag: assert definition and '\t' not in definition and '\n' not in definition entryString += definition elif element.tag == "kjv_def": location = "kjv_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0") #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" ) #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" ) KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') #dPrint( 'Quiet', debuggingThisModule, strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition entryString += KJVdefinition elif element.tag == "strongsref": location = "strongsref in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "kls2") BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24") strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5).replace('\n', '') if BibleOrgSysGlobals.debugFlag: assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef strongsRef = re.sub('<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef) strongsRef = re.sub('<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef) #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #dPrint( 'Quiet', debuggingThisModule, strongs5, "strongsRef", repr(strongsRef) ) entryString += ' ' + strongsRef elif element.tag == "see": location = "see in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k") BibleOrgSysGlobals.checkXMLNoTail(element, location, "kd02") BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20") # Process the attributes seeLanguage = seeStrongsNumber = None for attrib, value in element.items(): if attrib == "language": seeLanguage = value elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}". format(attrib, value, location)) if BibleOrgSysGlobals.debugFlag: assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit( ) assert seeLanguage in ( 'GREEK', 'HEBREW', ) if 'see' not in entryResults: entryResults['see'] = [] entryResults['see'].append(( 'G' if seeLanguage == 'GREEK' else 'H') + seeStrongsNumber) else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text)) if entryString: #dPrint( 'Quiet', debuggingThisModule, strongs5, "entryString", repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert '\t' not in entryString and '\n' not in entryString entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString) entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString) if BibleOrgSysGlobals.debugFlag: assert 'strongsref' not in entryString entryResults['Entry'] = entryString #dPrint( 'Quiet', debuggingThisModule, "entryResults", entryResults ) self.StrongsEntries[strongs] = entryResults