def _remove_old_items(channel: ElementTree) -> None: items = channel.findall("item") num_to_delete = max(0, len(items) - MAX_KEEP_ITEMS) elements_to_delete = items[:num_to_delete] for element in elements_to_delete: channel.remove(element)
def remove_simulation(session_path, simulation_name): """Removes the simulation from the session file and the session folder. Returns the state of the deleted simulation. """ deleted_state = SimulationState.Saved tree = ET().parse(session_path + '.xml') for simulation in tree.findall('simulation'): if (simulation.find('name').text == simulation_name): deleted_state = int(simulation.attrib['in_batch']) tree.remove(simulation) indent(tree) with open(session_path + '.xml', 'wb') as f: f.write(tostring(tree)) shutil.rmtree(session_path + os.sep + simulation_name) return deleted_state
class BibleReferencesLinksConverter: """ Class for reading, validating, and converting BibleReferencesLinks. This is only intended as a transitory class (used at start-up). The BibleReferencesLinks class has functions more generally useful. """ def __init__( self): # We can't give this parameters because of the singleton """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = 'BibleReferencesLinks' # These fields are used for parsing the XML self._treeTag = 'BibleReferencesLinks' self._headerTag = 'header' self._mainElementTag = 'BibleReferenceLinks' # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = () self._optionalAttributes = () self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes self._compulsoryElements = ( 'sourceReference', 'sourceComponent', 'BibleReferenceLink', ) self._optionalElements = () self._uniqueElements = ('sourceReference') # These are fields that we will fill later self._XMLheader, self._XMLTree = None, None self.__DataList = {} # Used for import self.titleString = self.PROGRAM_VERSION = self.dateString = '' # end of BibleReferencesLinksConverter.__init__ def loadAndValidate(self, XMLFileOrFilepath=None): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLTree is None: # We mustn't have already have loaded the data if XMLFileOrFilepath is None: # XMLFileOrFilepath = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '.xml' ) # Relative to module, not cwd import importlib.resources # From Python 3.7 onwards -- handles zipped resources also XMLFileOrFilepath = importlib.resources.open_text( 'BibleOrgSys.DataFiles', self._filenameBase + '.xml') self.__load(XMLFileOrFilepath) if BibleOrgSysGlobals.strictCheckingFlag: self.__validate() else: # The data must have been already loaded if XMLFileOrFilepath is not None and XMLFileOrFilepath != self.__XMLFileOrFilepath: logging.error( _("Bible references links are already loaded -- your different filepath of {!r} was ignored" ).format(XMLFileOrFilepath)) return self # end of BibleReferencesLinksConverter.loadAndValidate def __load(self, XMLFileOrFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert XMLFileOrFilepath self.__XMLFileOrFilepath = XMLFileOrFilepath assert self._XMLTree is None or len( self._XMLTree) == 0 # Make sure we're not doing this twice vPrint( 'Info', debuggingThisModule, _("Loading BibleReferencesLinks XML file from {!r}…").format( self.__XMLFileOrFilepath)) self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath) assert self._XMLTree # Fail here if we didn't load anything at all if self._XMLTree.tag == self._treeTag: header = self._XMLTree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLTree.remove(header) BibleOrgSysGlobals.checkXMLNoText(header, 'header') BibleOrgSysGlobals.checkXMLNoTail(header, 'header') BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header') if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText(work, "work in header") BibleOrgSysGlobals.checkXMLNoTail(work, "work in header") BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header") if work.tag == "work": self.PROGRAM_VERSION = work.find('version').text self.dateString = work.find('date').text self.titleString = work.find('title').text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag))) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( header.tail)) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLTree.tag)) # end of BibleReferencesLinksConverter.__load def __validate(self): """ Check/validate the loaded data. """ assert self._XMLTree uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLTree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Get the sourceComponent to use as a record ID ID = element.find("sourceComponent").text # Check compulsory elements for elementName in self._compulsoryElements: foundElement = element.find(elementName) if foundElement is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, j)) else: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag ) if not foundElement.text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check optional elements for elementName in self._optionalElements: foundElement = element.find(elementName) if foundElement is not None: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag) if not foundElement.text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, j)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}" ).format(element.tail, element.tag, j)) if self._XMLTree.tail is not None and self._XMLTree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLTree.tail, self._XMLTree.tag)) # end of BibleReferencesLinksConverter.__validate def __str__(self) -> str: """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ indent = 2 result = "BibleReferencesLinksConverter object" if self.titleString: result += ('\n' if result else '') + ' ' * indent + _("Title: {}").format( self.titleString) if self.PROGRAM_VERSION: result += ('\n' if result else '') + ' ' * indent + _("Version: {}").format( self.PROGRAM_VERSION) if self.dateString: result += ('\n' if result else '' ) + ' ' * indent + _("Date: {}").format(self.dateString) if self._XMLTree is not None: result += ('\n' if result else '') + ' ' * indent + _( "Number of entries = {:,}").format(len(self._XMLTree)) return result # end of BibleReferencesLinksConverter.__str__ def __len__(self): """ Returns the number of references links loaded. """ return len(self._XMLTree) # end of BibleReferencesLinksConverter.__len__ def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLTree if you prefer.) """ def makeList(parameter1, parameter2): """ Returns a list containing all parameters. Parameter1 may already be a list. """ if isinstance(parameter1, list): #assert parameter2 not in parameter1 parameter1.append(parameter2) return parameter1 else: return [parameter1, parameter2] # end of makeList assert self._XMLTree if self.__DataList: # We've already done an import/restructuring -- no need to repeat it return self.__DataList, self.__DataDict # We'll create a number of dictionaries with different elements as the key rawRefLinkList = [] actualLinkCount = 0 for element in self._XMLTree: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) ) # Get these first for helpful error messages sourceReference = element.find('sourceReference').text sourceComponent = element.find('sourceComponent').text assert sourceComponent in ( 'Section', 'Verses', 'Verse', ) BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1') BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference, 'kd21') BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20') actualRawLinksList = [] for subelement in element: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) ) if subelement.tag in ( 'sourceReference', 'sourceComponent', ): # already processed these BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'sqw1') elif subelement.tag == 'BibleReferenceLink': BibleOrgSysGlobals.checkXMLNoText(subelement, sourceReference, 'haw9') BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'jsd9') targetReference = subelement.find('targetReference').text targetComponent = subelement.find('targetComponent').text assert targetComponent in ( 'Section', 'Verses', 'Verse', ) linkType = subelement.find('linkType').text assert linkType in ( 'TSK', 'QuotedOTReference', 'AlludedOTReference', 'PossibleOTReference', ) actualRawLinksList.append(( targetReference, targetComponent, linkType, )) actualLinkCount += 1 rawRefLinkList.append(( sourceReference, sourceComponent, actualRawLinksList, )) vPrint( 'Normal', debuggingThisModule, f" {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)" ) myRefLinkList = [] actualLinkCount = 0 BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') for j, (sourceReference, sourceComponent, actualRawLinksList) in enumerate(rawRefLinkList): # Just do some testing first if sourceComponent == 'Verse': x = SimpleVerseKey(sourceReference) else: flag = False try: x = SimpleVerseKey(sourceReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( sourceComponent, sourceReference)) raise TypeError # Now do the actual parsing parsedSourceReference = FlexibleVersesKey(sourceReference) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, j, sourceComponent, sourceReference, parsedSourceReference) #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference actualLinksList = [] for k, (targetReference, targetComponent, linkType) in enumerate(actualRawLinksList): # Just do some testing first if targetComponent == 'Verse': x = SimpleVerseKey(targetReference) else: flag = False try: x = SimpleVerseKey(targetReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( targetComponent, targetReference)) raise TypeError # Now do the actual parsing try: parsedTargetReference = FlexibleVersesKey(targetReference) except TypeError: logging.error( " Temporarily ignored {!r} (TypeError from FlexibleVersesKey)" .format(targetReference)) parsedTargetReference = None if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, ' ', targetComponent, targetReference, parsedTargetReference) #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference actualLinksList.append(( targetReference, targetComponent, parsedTargetReference, linkType, )) actualLinkCount += 1 myRefLinkList.append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) vPrint( 'Normal', debuggingThisModule, " {:,} links processed (with {:,} actual link entries)".format( len(rawRefLinkList), actualLinkCount)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt self.__DataList = myRefLinkList # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Create a link dictionary (by verse key) myRefLinkDict = {} for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for verseRef in parsedSourceReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt originalLinks = len(myRefLinkDict) vPrint( 'Quiet', debuggingThisModule, " {:,} verse links added to dictionary (includes filling out spans)" .format(originalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt # Create a reversed link dictionary (by verse key) for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList: if parsedTargetReference is not None: for verseRef in parsedTargetReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if linkType == 'TSK': reverseLinkType = 'TSKQuoted' elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted' elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded' elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible' else: halt # Have a new linkType! if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (targetReference, targetComponent, parsedTargetReference, [ (sourceReference, sourceComponent, parsedSourceReference, reverseLinkType) ])) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt totalLinks = len(myRefLinkDict) reverseLinks = totalLinks - originalLinks vPrint( 'Quiet', debuggingThisModule, " {:,} reverse links added to dictionary to give {:,} total". format(reverseLinks, totalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt self.__DataDict = myRefLinkDict # Let's find the most number of references for a verse mostReferences = totalReferences = 0 for verseRef, entryList in self.__DataDict.items(): numRefs = len(entryList) if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef totalReferences += numRefs vPrint( 'Quiet', debuggingThisModule, " {:,} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText())) vPrint('Quiet', debuggingThisModule, " {:,} total links for all references".format(totalReferences)) return self.__DataList, self.__DataDict # end of BibleReferencesLinksConverter.importDataToPython def pickle(self, filepath=None): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert self._XMLTree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + '_Tables.pickle') vPrint('Normal', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wb') as myFile: pickle.dump(self.__DataList, myFile) pickle.dump(self.__DataDict, myFile) # end of BibleReferencesLinksConverter.pickle def exportDataWithIndex(self, filepath=None): """ Writes the information tables to a .pickle index file and .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import pickle assert self._XMLTree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) indexFilepath = os.path.join( folder, self._filenameBase + '_Tables.index.pickle') dataFilepath = os.path.join( folder, self._filenameBase + '_Tables.data.pickle') vPrint('Normal', debuggingThisModule, _("Exporting to {}…").format(dataFilepath)) index = {} filePosition = 0 with open(dataFilepath, 'wb') as myFile: for vKey, refList in self.__DataDict.items(): #vPrint( 'Quiet', debuggingThisModule, "vKey", vKey, vKey.getVerseKeyText() ) #vPrint( 'Quiet', debuggingThisModule, " ", refList ) length = myFile.write(pickle.dumps(refList)) #vPrint( 'Quiet', debuggingThisModule, " ", filePosition, length ) assert vKey not in index index[vKey] = (filePosition, length) filePosition += length with open(indexFilepath, 'wb') as myFile: pickle.dump(index, myFile) # end of BibleReferencesLinksConverter.exportDataWithIndex def exportDataToPython(self, filepath=None): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDictOrList(theFile, theDictOrList, dictName, keyComment, fieldsComment): """Exports theDictOrList to theFile.""" assert theDictOrList raise Exception("Not written yet") for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len(theDict[dictKey]) break # We only check the first (random) entry we get theFile.write( "{} = {{\n # Key is {}\n # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment)) for dictKey in sorted(theDict.keys()): theFile.write(' {}: {},\n'.format(repr(dictKey), repr(theDict[dictKey]))) theFile.write("}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDictOrList assert self._XMLTree self.importDataToPython() assert self.__DataList assert self.__DataDict vPrint('Quiet', debuggingThisModule, "Export to Python not written yet!") halt if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + '_Tables.py') vPrint('Normal', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wt', encoding='utf-8') as myFile: myFile.write("# {}\n#\n".format(filepath)) myFile.write( "# This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n#\n" .format(PROGRAM_VERSION, datetime.now())) if self.titleString: myFile.write("# {} data\n".format(self.titleString)) if self.PROGRAM_VERSION: myFile.write("# Version: {}\n".format(self.PROGRAM_VERSION)) if self.dateString: myFile.write("# Date: {}\n#\n".format(self.dateString)) myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLTree), self._treeTag)) mostEntries = "0=referenceNumber (integer 1..255), 1=sourceComponent/BBB (3-uppercase characters)" dictInfo = { "referenceNumberDict": ("referenceNumber (integer 1..255)", "specified"), "sourceComponentDict": ("sourceComponent", "specified"), "sequenceList": ("sourceComponent/BBB (3-uppercase characters)", ""), "initialAllAbbreviationsDict": ("allAbbreviations", mostEntries) } for dictName, dictData in self.__DataList.items(): exportPythonDictOrList(myFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1]) myFile.write("# end of {}".format(os.path.basename(filepath))) # end of BibleReferencesLinksConverter.exportDataToPython def exportDataToJSON(self, filepath=None): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert self._XMLTree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + '_Tables.json') vPrint('Normal', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wt', encoding='utf-8') as myFile: for something in self.__DataList: # temp for debugging … xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vPrint('Quiet', debuggingThisModule, "Dumping something", something) json.dump(something, myFile, indent=2) json.dump(self.__DataList, myFile, indent=2) for someKey, someItem in self.__DataDict.items( ): # temp for debugging … xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vPrint('Quiet', debuggingThisModule, "Dumping someKey", someKey) json.dump(someKey, myFile, indent=2) vPrint('Quiet', debuggingThisModule, "Dumping someItem", someItem) json.dump(someItem, myFile, indent=2) json.dump(self.__DataDict, myFile, indent=2) # end of BibleReferencesLinksConverter.exportDataToJSON def exportDataToC(self, filepath=None): """ Writes the information tables to a .h and .c files that can be included in c and c++ programs. NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically. """ def exportPythonDict(hFile, cFile, theDict, dictName, sortedBy, structure): """ Exports theDict to the .h and .c files. """ def convertEntry(entry): """ Convert special characters in an entry… """ result = "" if isinstance(entry, str): result = entry elif isinstance(entry, tuple): for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) elif isinstance(field, list): raise Exception("Not written yet (list1)") else: logging.error( _("Cannot convert unknown field type {!r} in tuple entry {!r}" ).format(field, entry)) elif isinstance(entry, dict): for key in sorted(entry.keys()): field = entry[key] if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) elif isinstance(field, list): raise Exception("Not written yet (list2)") else: logging.error( _("Cannot convert unknown field type {!r} in dict entry {!r}" ).format(field, entry)) else: logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry))) return result # end of convertEntry for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count break # We only check the first (random) entry we get #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) ) hFile.write("typedef struct {}EntryStruct {{\n".format(dictName)) for declaration in structure.split(';'): adjDeclaration = declaration.strip() if adjDeclaration: hFile.write(" {};\n".format(adjDeclaration)) hFile.write("}} {}Entry;\n\n".format(dictName)) cFile.write( "const static {}Entry\n {}[{}] = {{\n // Fields ({}) are {}\n // Sorted by {}\n" .format(dictName, dictName, len(theDict), fieldsCount, structure, sortedBy)) for dictKey in sorted(theDict.keys()): if isinstance(dictKey, str): cFile.write(" {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]))) elif isinstance(dictKey, int): cFile.write(" {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]))) else: logging.error( _("Can't handle this type of key data yet: {}").format( dictKey)) cFile.write("]}}; // {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDict assert self._XMLTree self.importDataToPython() assert self.__DataList vPrint('Quiet', debuggingThisModule, "Export to C not written yet!") halt if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + '_Tables') hFilepath = filepath + '.h' cFilepath = filepath + '.c' vPrint('Normal', debuggingThisModule, _("Exporting to {}…").format( cFilepath)) # Don't bother telling them about the .h file ifdefName = self._filenameBase.upper() + "_Tables_h" with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \ open( cFilepath, 'wt', encoding='utf-8' ) as myCFile: myHFile.write("// {}\n//\n".format(hFilepath)) myCFile.write("// {}\n//\n".format(cFilepath)) lines = "// This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n//\n".format( PROGRAM_VERSION, datetime.now()) myHFile.write(lines) myCFile.write(lines) if self.titleString: lines = "// {} data\n".format(self.titleString) myHFile.write(lines) myCFile.write(lines) if self.PROGRAM_VERSION: lines = "// Version: {}\n".format(self.PROGRAM_VERSION) myHFile.write(lines) myCFile.write(lines) if self.dateString: lines = "// Date: {}\n//\n".format(self.dateString) myHFile.write(lines) myCFile.write(lines) myCFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLTree), self._treeTag)) myHFile.write("\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName)) myCFile.write('#include "{}"\n\n'.format( os.path.basename(hFilepath))) CHAR = "const unsigned char" BYTE = "const int" dictInfo = { "referenceNumberDict": ("referenceNumber (integer 1..255)", "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} sourceComponent[3+1];" .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR)), "sourceComponentDict": ("sourceComponent", "{} sourceComponent[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks;" .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR)), "sequenceList": ("sequenceList", ), "CCELDict": ("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} sourceComponent[3+1];" .format(CHAR, BYTE, CHAR)), "initialAllAbbreviationsDict": ("abbreviation", "{}* abbreviation; {} sourceComponent[3+1];".format( CHAR, CHAR)) } for dictName, dictData in self.__DataList.items(): exportPythonDict(myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1]) myHFile.write("#endif // {}\n\n".format(ifdefName)) myHFile.write("// end of {}".format(os.path.basename(hFilepath))) myCFile.write("// end of {}".format(os.path.basename(cFilepath)))
class BibleOrganizationalSystemsConverter: """ Class for handling and converting BibleOrganizationalSystems. """ def __init__( self ): """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = "BibleOrganizationalSystems" # These fields are used for parsing the XML self._treeTag = "BibleOrganizationalSystems" self._headerTag = "header" self._mainElementTag = "BibleOrganizationalSystem" # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = ( "type", ) self._optionalAttributes = () self._uniqueAttributes = () self._compulsoryElements = ( "referenceAbbreviation", "languageCode", ) self._optionalElements = ( "name", "completionDate", "publicationDate", "copyright", "versificationSystem", "punctuationSystem", "bookOrderSystem", "booksNamesSystem", "translator", "publisher", "derivedFrom", "usesText", "includesBooks", "url", "comment", ) self._uniqueElements = () self._allowedMultiple = ( "name", "translator", "derivedFrom", "usesText", "url", "comment", ) # These are fields that we will fill later self.title, self.version, self.date = None, None, None self.header, self._XMLtree = None, None self.__dataDicts = None # Get the data tables that we need for proper checking self._ISOLanguages = ISO_639_3_Languages().loadData() self._BibleBookOrderSystems = BibleBookOrderSystems().loadData() self._BiblePunctuationSystems = BiblePunctuationSystems().loadData() self._BibleVersificationSystems = BibleVersificationSystems().loadData() self._BibleBooksNamesSystems = BibleBooksNamesSystems().loadData() # end of BibleOrganizationalSystemsConverter.__init__ def __str__( self ): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ result = "" if self.title: result += ('\n' if result else '') + self.title if self.version: result += ('\n' if result else '') + " Version: {}".format( self.version ) if self.date: result += ('\n' if result else '') + " Date: {}".format( self.date ) result += ('\n' if result else '') + " Number of entries = {}".format( len(self._XMLtree) ) return result # end of BibleOrganizationalSystemsConverter.__str__ def __len__( self ): """ Returns the number of items loaded. """ return len( self._XMLtree ) # end of BibleOrganizationalSystemsConverter.__len__ def loadAndValidate( self, XMLFilepath=None ): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml" ) # Relative to module, not cwd self._load( XMLFilepath ) if BibleOrgSysGlobals.strictCheckingFlag: self._validate() return self # end of BibleOrganizationalSystemsConverter.loadAndValidate def _load( self, XMLFilepath ): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert( XMLFilepath ) self.__XMLFilepath = XMLFilepath assert( self._XMLtree is None or len(self._XMLtree)==0 ) # Make sure we're not doing this twice if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading BibleOrganisationalSystems XML file from {!r}...").format( self.__XMLFilepath ) ) self._XMLtree = ElementTree().parse( self.__XMLFilepath ) assert( self._XMLtree ) # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.header = header self._XMLtree.remove( header ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] if work.tag == "work": self.version = work.find("version").text self.date = work.find("date").text self.title = work.find("title").text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for {!r} tag)").format( self._headerTag ) ) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag ) ) # end of BibleOrganizationalSystemsConverter._load def _validate( self ): """ Check/validate the loaded data. """ assert( self._XMLtree ) uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] expectedID = 1 for j,element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) ) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) ID = element.find("referenceAbbreviation").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find( elementName ) is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})").format( elementName, ID, j ) ) elif not element.find( elementName ).text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) ) # Check optional elements for elementName in self._optionalElements: if element.find( elementName ) is not None: if not element.find( elementName ).text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})").format( subelement.tag, subelement.text, ID, j ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})").format( text, elementName, ID, j ) ) uniqueDict["Element_"+elementName].append( text ) # Special checks of particular fields if element.find("includesBooks") is not None: bookList = element.find("includesBooks").text.split() for BBB in bookList: if not BibleOrgSysGlobals.BibleBooksCodes.isValidReferenceAbbreviation( BBB ): logging.critical( _("Unrecognized {!r} Bible book code found in 'includesBooks' in record with ID {!r} (record {})").format( BBB, ID, j) ) if bookList.count( BBB ) > 1: logging.error( _("Multiple {!r} Bible book codes found in 'includesBooks' in record with ID {!r} (record {})").format( BBB, ID, j) ) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) ) # end of BibleOrganizationalSystemsConverter._validate def importDataToPython( self ): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ assert( self._XMLtree ) if self.__dataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__dataDicts # We'll create a number of dictionaries with different elements as the key dataDict, indexDict, combinedIndexDict = {}, {}, {} for element in self._XMLtree: bits = {} # Get the required information out of the tree for this element # Start with the compulsory elements and type attribute referenceAbbreviation = element.find('referenceAbbreviation').text bits['referenceAbbreviation'] = referenceAbbreviation myType = element.get( 'type' ) bits['type'] = myType if myType not in allowedTypes: logging.error( _("Unrecognized {!r} type for {!r} (expected one of {})").format(myType,referenceAbbreviation,allowedTypes) ) languageCode = element.find('languageCode').text if self._ISOLanguages and not self._ISOLanguages.isValidLanguageCode( languageCode ): # Check that we have a valid language code if languageCode != '???': logging.error( "Unrecognized {!r} ISO-639-3 language code in {!r} organisational system".format( languageCode, referenceAbbreviation ) ) bits['languageCode'] = languageCode # Now work on the optional elements for name in ( 'name', 'publicationDate', 'versificationSystem', 'punctuationSystem', 'bookOrderSystem', 'booksNamesSystem', 'derivedFrom', 'usesText', 'includesBooks' ): for nameData in element.findall(name): if name in self._allowedMultiple: # Put multiple entries into a list if name not in bits: bits[name] = [nameData.text] else: bits[name].append( nameData.text ) else: # Not allowed multiples if name in bits: logging.error( _("Unexpected multiple {} elements found in {} {}").format(name, referenceAbbreviation, myType) ) if name=='includesBooks': # special handling bits['includesBooks'] = nameData.text.split() for BBB in bits['includesBooks']: if not BibleOrgSysGlobals.BibleBooksCodes.isValidReferenceAbbreviation( BBB ): logging.error( _("Unrecognized {!r} Bible book code found in 'includesBooks' in {} {}").format( BBB, referenceAbbreviation, myType) ) else: bits[name] = nameData.text # normal handling extension = '_' + myType extendedRA = referenceAbbreviation if referenceAbbreviation.endswith(extension) else (referenceAbbreviation + extension) dataDict[extendedRA] = bits if referenceAbbreviation in indexDict: indexDict[referenceAbbreviation].append( extendedRA ) else: indexDict[referenceAbbreviation] = [extendedRA] if referenceAbbreviation in combinedIndexDict: combinedIndexDict[referenceAbbreviation].append( extendedRA ) else: combinedIndexDict[referenceAbbreviation] = [extendedRA] if extendedRA != referenceAbbreviation: #assert( extendedRA not in combinedIndexDict ) if extendedRA in combinedIndexDict: logging.error( _("Found {} in combinedIndexDict").format( extendedRA ) ) combinedIndexDict[extendedRA] = [extendedRA] assert( len(indexDict) <= len(dataDict) ) assert( len(combinedIndexDict) >= len(indexDict) ) if BibleOrgSysGlobals.strictCheckingFlag: # We'll do quite a bit more cross-checking now for extendedReferenceAbbreviation,data in dataDict.items(): #print( extendedReferenceAbbreviation, data ) systemType = data['type'] if systemType=='edition': if 'derivedFrom' in data: logging.error( _("{} shouldn't use 'derivedFrom' {!r}").format( extendedReferenceAbbreviation, data['derivedFrom'] ) ) if 'usesText' not in data: logging.error( _("{} doesn't specify 'usesText'").format( extendedReferenceAbbreviation ) ) else: # have a 'usesText' list for textAbbrev in data['usesText']: if textAbbrev not in indexDict: logging.error( _("{} specifies unknown {!r} text in 'usesText' field").format(extendedReferenceAbbreviation,textAbbrev) ) elif len(indexDict[textAbbrev]) > 1: # it could be ambiguous found = 0 for thisType in ('revision','translation','original'): # but not 'edition' usesTextExtended = textAbbrev + '_' + thisType if usesTextExtended in dataDict: foundOne = usesTextExtended found += 1 assert( found > 0 ) if found==1: # ah, it's not actually ambiguous if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Adjusted text used for {} from the ambiguous {!r} to the extended name {!r}").format( extendedReferenceAbbreviation, textAbbrev, foundOne ) ) data['usesText'].remove( textAbbrev) data['usesText'].append( foundOne ) else: logging.warning( _("{} specifies ambiguous {!r} (could be {}) texts in 'usesText' field").format(extendedReferenceAbbreviation,textAbbrev,indexDict[textAbbrev]) ) elif systemType=='revision': if 'derivedFrom' not in data: logging.error( _("{} doesn't specify 'derivedFrom'").format( extendedReferenceAbbreviation ) ) else: for df in data['derivedFrom']: if df not in indexDict: logging.error( _("{} specifies unknown {!r} text in 'derivedFrom' field").format(extendedReferenceAbbreviation,df) ) elif len(indexDict[df]) > 1: logging.warning( _("{} specifies ambiguous {!r} (could be {}) texts in 'derivedFrom' field").format(extendedReferenceAbbreviation,df,indexDict[df]) ) elif systemType=='translation': if 'derivedFrom' not in data: logging.warning( _("{} doesn't specify 'derivedFrom'").format( extendedReferenceAbbreviation ) ) else: for df in data['derivedFrom']: if df not in indexDict: logging.error( _("{} specifies unknown {!r} text in 'derivedFrom' field").format(extendedReferenceAbbreviation,df) ) elif len(indexDict[df]) > 1: logging.warning( _("{} specifies ambiguous {!r} (could be {}) texts in 'derivedFrom' field").format(extendedReferenceAbbreviation,df,indexDict[df]) ) elif systemType=='original': if 'derivedFrom' in data: logging.error( _("{} shouldn't use 'derivedFrom' {!r}").format( extendedReferenceAbbreviation, data['derivedFrom'] ) ) if 'versificationSystem' in data and data['versificationSystem'] not in ('None', 'Unknown'): if not self._BibleVersificationSystems.isValidVersificationSystemName( data['versificationSystem'] ): extra = "\n Available systems are {}".format( self._BibleVersificationSystems.getAvailableVersificationSystemNames()) if BibleOrgSysGlobals.verbosityLevel > 2 else '' logging.error( _("Unknown {!r} versification system name in {}{}").format(data['versificationSystem'],extendedReferenceAbbreviation,extra) ) if 'punctuationSystem' in data and data['punctuationSystem'] not in ('None', 'Unknown'): if not self._BiblePunctuationSystems.isValidPunctuationSystemName( data['punctuationSystem'] ): extra = "\n Available systems are {}".format( self._BiblePunctuationSystems.getAvailablePunctuationSystemNames()) if BibleOrgSysGlobals.verbosityLevel > 2 else '' logging.error( _("Unknown {!r} punctuation system name in {}{}").format(data['punctuationSystem'],extendedReferenceAbbreviation,extra) ) self.__dataDicts = dataDict, indexDict, combinedIndexDict return self.__dataDicts # end of importDataToPython def pickle( self, filepath=None ): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) filepath = os.path.join( folder, self._filenameBase + "_Tables.pickle" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) with open( filepath, 'wb' ) as myFile: pickle.dump( self.__dataDicts, myFile ) # end of pickle def exportDataToPython( self, filepath=None ): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ): """Exports theDict to theFile.""" theFile.write( "{} = {{\n # Key is {}\n # Fields are: {}\n".format( dictName, keyComment, fieldsComment ) ) for dictKey in sorted(theDict.keys()): theFile.write( ' {}: {},\n'.format( repr(dictKey), theDict[dictKey] ) ) theFile.write( "}}\n# end of {}\n\n".format( dictName ) ) # end of exportPythonDict assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.py" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) dataDict, indexDict, combinedIndexDict = self.importDataToPython() with open( filepath, 'wt' ) as myFile: myFile.write( "# {}\n#\n".format( filepath ) ) myFile.write( "# This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) ) if self.title: myFile.write( "# {}\n".format( self.title ) ) if self.version: myFile.write( "# Version: {}\n".format( self.version ) ) if self.date: myFile.write( "# Date: {}\n#\n".format( self.date ) ) myFile.write( "# {} {} entries loaded from the original XML file.\n".format( len(self._XMLtree), self._treeTag ) ) #myFile.write( "# {} {} loaded from the original XML files.\n#\n\n".format( len(self.systems), self._treeTag ) ) exportPythonDict( myFile, dataDict, "dataDict", "extendedReferenceAbbreviation", "referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, indexDict, "indexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, combinedIndexDict, "combinedIndexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) # end of exportDataToPython def exportDataToJSON( self, filepath=None ): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.json" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) with open( filepath, 'wt' ) as myFile: #myFile.write( "# {}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) ) #if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) #if self.ProgVersion: myFile.write( "# Version: {}\n".format( self.ProgVersion ) ) #if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) #myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) json.dump( self.__dataDicts, myFile, indent=2 ) #myFile.write( "\n\n# end of {}".format( os.path.basename(filepath) ) ) # end of exportDataToJSON def exportDataToC( self, filepath=None ): """ Writes the information tables to a .h file that can be included in c and c++ programs. """ raise Exception( "C export not written yet" ) def exportPythonDict( theFile, theDict, dictName, structName, fieldsComment ): """Exports theDict to theFile.""" def convertEntry( entry ): """Convert special characters in an entry...""" result = "" for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}").format( field, entry ) ) return result theFile.write( "static struct {} {}[] = {\n // Fields are {}\n".format( structName, dictName, fieldsComment ) ) for entry in sorted(theDict.keys()): if isinstance( entry, str ): theFile.write( " {\"{}\", {}},\n".format( entry, convertEntry(theDict[entry]) ) ) elif isinstance( entry, int ): theFile.write( " {{}, {}},\n".format( entry, convertEntry(theDict[entry]) ) ) else: logging.error( _("Can't handle this type of data yet: {}").format( entry ) ) theFile.write( "}; // {}\n\n".format( dictName) ) # end of exportPythonDict assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.h" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) IDDict, RADict, SBLDict, OADict, PADict, PNDict = self.importDataToPython() ifdefName = self._filenameBase.upper() + "_Tables_h" with open( filepath, 'wt' ) as myFile: myFile.write( "// {}\n//\n".format( filepath ) ) myFile.write( "// This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n//\n".format( ProgVersion, datetime.now() ) ) if self.title: myFile.write( "// {}\n".format( self.title ) ) if self.version: myFile.write( "// Version: {}\n".format( self.version ) ) if self.date: myFile.write( "// Date: {}\n//\n".format( self.date ) ) myFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) ) myFile.write( "#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) ) exportPythonDict( myFile, IDDict, "IDDict", "{int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "id (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, RADict, "RADict", "{char* refAbbrev; int id; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "referenceAbbreviation (sorted), SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, SBLDict, "SBLDict", "{char* SBLAbbrev; int id; char* refAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "SBLAbbreviation (sorted), ReferenceAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, OADict, "OADict", "{char* OSISAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "OSISAbbreviation (sorted), ReferenceAbbreviation, SBLAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, PADict, "PADict", "{char* PTAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTNum; char* EngName;}", "ParatextAbbreviation (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, PNDict, "PNDict", "{char* PTNum; int id; char* PTAbbrev; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* EngName;}", "ParatextNumberString (sorted), ParatextAbbreviation, referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, id, nameEnglish (comment only)" ) myFile.write( "#endif // {}\n".format( ifdefName ) )
class ZefaniaXMLBible(Bible): """ Class for reading, validating, and converting ZefaniaXMLBible XML. """ XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}" treeTag = 'XMLBIBLE' infoTag = 'INFORMATION' bookTag = 'BIBLEBOOK' chapterTag = 'CHAPTER' captionTag = 'CAPTION' verseTag = 'VERS' noteTag = 'NOTE' styleTag = 'STYLE' breakTag = 'BR' def __init__(self, sourceFolder, givenName, encoding='utf-8'): """ Constructor: just sets up the Zefania Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = "Zefania XML Bible object" self.objectTypeString = "Zefania" # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.tree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG") # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): print("ZefaniaXMLBible: File '{}' is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of ZefaniaXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ if Globals.verbosityLevel > 2: print(_("Loading {}...").format(self.sourceFilepath)) self.tree = ElementTree().parse(self.sourceFilepath) if Globals.debugFlag: assert (len(self.tree) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == ZefaniaXMLBible.treeTag: location = "Zefania XML file" Globals.checkXMLNoText(self.tree, location, '4f6h') Globals.checkXMLNoTail(self.tree, location, '1wk8') schema = None name = status = BibleType = revision = version = lgid = None for attrib, value in self.tree.items(): if attrib == ZefaniaXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation': schema = value elif attrib == "biblename": name = value elif attrib == "lgid": lgid = value # In italian.xml this is set to "german" elif attrib == "status": status = value elif attrib == "type": BibleType = value elif attrib == "revision": revision = value elif attrib == "version": version = value else: logging.warning( "Unprocessed '{}' attribute ({}) in main element". format(attrib, value)) if name: self.name = name if status: self.status = status if revision: self.revision = revision if version: self.version = version if self.tree[0].tag == 'INFORMATION': self.header = self.tree[0] self.tree.remove(self.header) self.__validateAndExtractHeader() else: # Handle information records at the END of the file ix = len(self.tree) - 1 if self.tree[ix].tag == 'INFORMATION': self.header = self.tree[ix] self.tree.remove(self.header) self.__validateAndExtractHeader() # Find the submain (book) containers for element in self.tree: if element.tag == ZefaniaXMLBible.bookTag: sublocation = "book in " + location Globals.checkXMLNoText(element, sublocation, 'g3g5') Globals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) else: logging.error("Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.bookTag, element.tag)) else: logging.error("Expected to load '{}' but got '{}'".format( ZefaniaXMLBible.treeTag, self.tree.tag)) self.doPostLoadProcessing() # end of ZefaniaXMLBible.load def __validateAndExtractHeader(self): """ Extracts information out of the header record, such as: <INFORMATION> <title>King James Version</title> <creator></creator> <subject>The Holy Bible</subject> <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description> <publisher>FREE BIBLE SOFTWARE GROUP</publisher> <contributors /> <date>2009-01-23</date> <type>Bible</type> <format>Zefania XML Bible Markup Language</format> <identifier>kjv</identifier> <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source> <language>ENG</language> <coverage>provide the Bible to the nations of the world</coverage> <rights>We believe that this Bible is found in the Public Domain.</rights> </INFORMATION> """ if Globals.debugFlag: assert (self.header) location = 'Header' Globals.checkXMLNoAttributes(self.header, location, 'j4j6') Globals.checkXMLNoText(self.header, location, 'sk4l') Globals.checkXMLNoTail(self.header, location, 'a2d4') # TODO: We probably need to rationalise some of the self.xxx stores for element in self.header: #print( "header", element.tag ) if element.tag == 'title': sublocation = "title in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.title = element.text elif element.tag == 'creator': sublocation = "creator in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.creator = element.text elif element.tag == 'subject': sublocation = "subject in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.subject = element.text elif element.tag == 'description': sublocation = "description in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.description = element.text elif element.tag == 'publisher': sublocation = "publisher in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.publisher = element.text elif element.tag == 'contributors': sublocation = "contributors in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.contributors = element.text elif element.tag == 'date': sublocation = "date in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.date = element.text elif element.tag == 'type': sublocation = "type in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.documentType = element.text elif element.tag == 'format': sublocation = "format in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) if Globals.debugFlag: assert ( element.text == 'Zefania XML Bible Markup Language') elif element.tag == 'identifier': sublocation = "identifier in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.identifier = element.text elif element.tag == 'source': sublocation = "source in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.source = element.text elif element.tag == 'language': sublocation = "language in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if Globals.debugFlag: assert (element.text) self.language = element.text elif element.tag == 'coverage': sublocation = "coverage in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.coverage = element.text elif element.tag == 'rights': sublocation = "rights in {}".format(location) Globals.checkXMLNoTail(element, sublocation, 'al1d') Globals.checkXMLNoAttributes(element, sublocation, 'j3jd') Globals.checkXMLNoSubelements(element, sublocation, '5g78') if element.text: self.rights = element.text else: logging.error("Found unexpected '{}' tag in {}".format( element.tag, location)) # end of ZefaniaXMLBible.__validateAndExtractHeader def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if Globals.verbosityLevel > 3: print(_("Validating XML book...")) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib, value in book.items(): if attrib == "bnumber": bookNumber = value elif attrib == "bname": bookName = value elif attrib == "bsname": bookShortName = value else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value)) if bookNumber: try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBB(bookName) if BBB: if Globals.verbosityLevel > 2: print(_("Validating {} {}...").format(BBB, bookName)) thisBook = BibleBook(self.name, BBB) thisBook.objectNameString = "Zefania XML Bible Book object" thisBook.objectTypeString = "Zefania" #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == ZefaniaXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) Globals.checkXMLNoText(element, sublocation, 'j3jd') Globals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error("Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.chapterTag, element.tag)) if Globals.verbosityLevel > 2: print(" Saving {} into results...".format(BBB)) self.saveBook(thisBook) # end of ZefaniaXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if Globals.verbosityLevel > 3: print(_("Validating XML chapter...")) # Process the chapter attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "cnumber": chapterNumber = value else: logging.warning( "Unprocessed '{}' attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #print( BBB, 'c', chapterNumber ) thisBook.appendLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for BBB".format(BBB)) for element in chapter: if element.tag == ZefaniaXMLBible.verseTag: location = "verse in {} {}".format(BBB, chapterNumber) self.__validateAndExtractVerse(BBB, chapterNumber, thisBook, element) elif element.tag == ZefaniaXMLBible.captionTag: # Used in Psalms location = "caption in {} {}".format(BBB, chapterNumber) Globals.checkXMLNoTail(element, location, 'k5k8') Globals.checkXMLNoSubelements(element, location, 'd3f5') # Handle caption attributes vRef = None for attrib, value in element.items(): if attrib == "vref": vRef = value if Globals.debugFlag: assert (vRef == '1') else: logging.warning( "Unprocessed '{}' attribute ({}) in caption element" .format(attrib, value)) if Globals.debugFlag: assert (vRef) vText = element.text if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, vRef)) if vText: # This is the main text of the caption #print( "{} {}:{} '{}'".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.appendLine('v', '0' + ' ' + vText) # We save it as verse zero else: logging.error("Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.verseTag, element.tag)) # end of ZefaniaXMLBible.__validateAndExtractChapter def __validateAndExtractVerse(self, BBB, chapterNumber, thisBook, verse): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if Globals.verbosityLevel > 3: print(_("Validating XML verse...")) location = "verse in {} {}".format(BBB, chapterNumber) Globals.checkXMLNoTail(verse, location, 'l5ks') # Handle verse attributes verseNumber = toVerseNumber = None for attrib, value in verse.items(): if attrib == "vnumber": verseNumber = value else: logging.warning( "Unprocessed '{}' attribute ({}) in verse element".format( attrib, value)) if Globals.debugFlag: assert (verseNumber) location = "{}:{}".format( location, verseNumber) # Get a better location description #thisBook.appendLine( 'v', verseNumber ) vText = verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) # Handle verse subelements (notes and styled portions) for subelement in verse: if subelement.tag == ZefaniaXMLBible.noteTag: sublocation = "note in " + location noteType = None for attrib, value in subelement.items(): if attrib == "type": noteType = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subelement" .format(attrib, value)) if noteType not in ( 'n-studynote', 'x-studynote', ): logging.warning("Unexpected {} note type in {}".format( noteType, BBB)) if Globals.debugFlag: assert (noteType) nText, nTail = subelement.text, subelement.tail #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) #thisBook.appendLine( 'ST', css ) # XXXXXXXXXXXXXXXXXXXXXXXXXX Losing data here (for now) #thisBook.appendLine( 'ST=', nText ) if nTail: if '\n' in nTail: print( "ZefaniaXMLBible.__validateAndExtractVerse: nTail {} {}:{} '{}'" .format(BBB, chapterNumber, verseNumber, nTail)) nTail = nTail.replace('\n', ' ') thisBook.appendLine('v~', nTail) for subsubelement in subelement: if subsubelement.tag == ZefaniaXMLBible.styleTag: subsublocation = "style in " + sublocation Globals.checkXMLNoSubelements(subsubelement, subsublocation, 'fyt4') css = idStyle = None for attrib, value in subsubelement.items(): if attrib == "css": css = value elif attrib == "id": idStyle = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subsubelement" .format(attrib, value)) if Globals.debugFlag: assert (css or idStyle) SFM = None if css == "font-style:italic": SFM = '\\it' elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' elif css == "color:#FF0000": SFM = '\\em' elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' elif css is None and idStyle == 'cl:divineName': SFM = '\\nd' else: print("css is", css, "idStyle is", idStyle) halt sText, sTail = subsubelement.text.strip( ), subsubelement.tail if Globals.debugFlag: assert (sText) if SFM: vText += SFM + ' ' + sText + SFM + '*' else: vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() else: logging.error( "Expected to find {} but got '{}' in {}".format( ZefaniaXMLBible.styleTag, subsubelement.tag, sublocation)) elif subelement.tag == ZefaniaXMLBible.styleTag: sublocation = "style in " + location Globals.checkXMLNoSubelements(subelement, sublocation, 'f5gh') css = idStyle = None for attrib, value in subelement.items(): if attrib == "css": css = value elif attrib == "id": idStyle = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subelement" .format(attrib, value)) if Globals.debugFlag: assert (css or idStyle) SFM = None if css == "font-style:italic": SFM = '\\it' elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' elif css == "color:#FF0000": SFM = '\\em' elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' elif css is None and idStyle == 'cl:divineName': SFM = '\\nd' else: print("css is", css, "idStyle is", idStyle) halt sText, sTail = subelement.text.strip(), subelement.tail if Globals.debugFlag: assert (sText) if SFM: vText += SFM + ' ' + sText + SFM + '*' else: vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() elif subelement.tag == ZefaniaXMLBible.breakTag: sublocation = "line break in " + location Globals.checkXMLNoText(subelement, sublocation, 'c1d4') Globals.checkXMLNoSubelements(subelement, sublocation, 'g4g8') art = None for attrib, value in subelement.items(): if attrib == "art": art = value else: logging.warning( "Unprocessed '{}' attribute ({}) in style subelement" .format(attrib, value)) if Globals.debugFlag: assert (art == 'x-nl') #print( BBB, chapterNumber, verseNumber ) #assert( vText ) if vText: thisBook.appendLine('v', verseNumber + ' ' + vText) vText = '' thisBook.appendLine( 'm', subelement.tail.strip() if subelement.tail else '') #bTail = subelement.tail #if bTail: vText = bTail.strip() else: logging.error( "Expected to find NOTE or STYLE but got '{}' in {}".format( subelement.tag, location)) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: print( "ZefaniaXMLBible.__validateAndExtractVerse: vText {} {}:{} '{}'" .format(BBB, chapterNumber, verseNumber, vText)) vText = vText.replace('\n', ' ') thisBook.appendLine('v', verseNumber + ' ' + vText)
class USFMMarkersConverter: """ Class for reading, validating, and converting USFMMarkers. This is only intended as a transitory class (used at start-up). The USFMMarkers class has functions more generally useful. """ def __init__( self): # We can't give this parameters because of the singleton """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = "USFMMarkers" # These fields are used for parsing the XML self._treeTag = "USFMMarkers" self._headerTag = "header" self._mainElementTag = "USFMMarker" # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = () self._optionalAttributes = () self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes self._compulsoryElements = ( "nameEnglish", "marker", "compulsory", "level", "numberable", "nests", "hasContent", "printed", "closed", "occursIn", "deprecated", ) self._optionalElements = ("description", ) #self._uniqueElements = self._compulsoryElements + self.optionalElements self._uniqueElements = ( "nameEnglish", "marker", ) # These are fields that we will fill later self._XMLheader, self._XMLtree = None, None self.__DataDicts = {} # Used for import self.titleString = self.ProgVersion = self.dateString = '' # end of __init__ def loadAndValidate(self, XMLFilepath=None): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml") # Relative to module, not cwd self.__load(XMLFilepath) if Globals.strictCheckingFlag: self.__validate() else: # The data must have been already loaded if XMLFilepath is not None and XMLFilepath != self.__XMLFilepath: logging.error( _("Bible books codes are already loaded -- your different filepath of '{}' was ignored" ).format(XMLFilepath)) return self # end of loadAndValidate def __load(self, XMLFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert (XMLFilepath) self.__XMLFilepath = XMLFilepath assert (self._XMLtree is None or len(self._XMLtree) == 0 ) # Make sure we're not doing this twice if Globals.verbosityLevel > 2: print( _("Loading USFMMarkers XML file from '{}'...").format( self.__XMLFilepath)) self._XMLtree = ElementTree().parse(self.__XMLFilepath) assert (self._XMLtree) # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLtree.remove(header) Globals.checkXMLNoText(header, "header") Globals.checkXMLNoTail(header, "header") Globals.checkXMLNoAttributes(header, "header") if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] Globals.checkXMLNoText(work, "work in header") Globals.checkXMLNoTail(work, "work in header") Globals.checkXMLNoAttributes(work, "work in header") if work.tag == "work": self.ProgVersion = work.find("version").text self.dateString = work.find("date").text self.titleString = work.find("title").text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for '{}' tag)".format( self._headerTag))) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected '{}' tail data after header").format( element.tail)) else: logging.error( _("Expected to load '{}' but got '{}'").format( self._treeTag, self._XMLtree.tag)) # end of __load def __validate(self): """ Check/validate the loaded data. """ assert (self._XMLtree) uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: Globals.checkXMLNoText(element, element.tag) Globals.checkXMLNoTail(element, element.tag) if not self._compulsoryAttributes and not self._optionalAttributes: Globals.checkXMLNoAttributes(element, element.tag) if not self._compulsoryElements and not self._optionalElements: Globals.checkXMLNoSubelements(element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory '{}' attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory '{}' attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional '{}' attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional '{}' attribute ('{}') found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found '{}' data repeated in '{}' field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Get the marker to use as a record ID marker = element.find("marker").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory '{}' element is missing in record with marker '{}' (record {})" ).format(elementName, marker, j)) elif not element.find(elementName).text: logging.warning( _("Compulsory '{}' element is blank in record with marker '{}' (record {})" ).format(elementName, marker, j)) # Check optional elements for elementName in self._optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional '{}' element is blank in record with marker '{}' (record {})" ).format(elementName, marker, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional '{}' element ('{}') found in record with marker '{}' (record {})" ).format(subelement.tag, subelement.text, marker, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found '{}' data repeated in '{}' element in record with marker '{}' (record {})" ).format(text, elementName, marker, j)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected '{}' tail data after {} element in record {}" ).format(element.tail, element.tag, j)) if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected '{}' tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag)) # end of __validate def __str__(self): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ indent = 2 result = "USFMMarkersConverter object" if self.titleString: result += ('\n' if result else '') + ' ' * indent + _("Title: {}").format( self.titleString) if self.ProgVersion: result += ('\n' if result else '') + ' ' * indent + _("Version: {}").format( self.ProgVersion) if self.dateString: result += ('\n' if result else '' ) + ' ' * indent + _("Date: {}").format(self.dateString) if self._XMLtree is not None: result += ('\n' if result else '') + ' ' * indent + _("Number of entries = {}").format( len(self._XMLtree)) return result # end of __str__ def __len__(self): """ Returns the number of SFM markers loaded. """ return len(self._XMLtree) # end of __len__ def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ assert (self._XMLtree) if self.__DataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__DataDicts # Load and validate entries and create the dictionaries and lists # Note that the combined lists include the numbered markers, e.g., s as well as s1, s2, ... rawMarkerDict, numberedMarkerList, combinedMarkerDict, = OrderedDict( ), [], {} conversionDict, backConversionDict = {}, {} newlineMarkersList, numberedNewlineMarkersList, combinedNewlineMarkersList = [], [], [] internalMarkersList, numberedInternalMarkersList, combinedInternalMarkersList = [], [], [] noteMarkersList, deprecatedMarkersList = [], [] for element in self._XMLtree: # Get the required information out of the tree for this element # Start with the compulsory elements nameEnglish = element.find( "nameEnglish" ).text # This name is really just a comment element marker = element.find("marker").text if marker.lower() != marker: logging.error( _("Marker '{}' should be lower case").format(marker)) compulsory = element.find("compulsory").text if compulsory not in ("Yes", "No"): logging.error( _("Unexpected '{}' compulsory field for marker '{}'"). format(compulsory, marker)) level = element.find("level").text compulsoryFlag = compulsory == "Yes" if level == "Newline": newlineMarkersList.append(marker) combinedNewlineMarkersList.append(marker) elif level == "Internal": internalMarkersList.append(marker) elif level == "Note": noteMarkersList.append(marker) else: logging.error( _("Unexpected '{}' level field for marker '{}'").format( level, marker)) numberable = element.find("numberable").text if numberable not in ("Yes", "No"): logging.error( _("Unexpected '{}' numberable field for marker '{}'"). format(numberable, marker)) numberableFlag = numberable == "Yes" if numberableFlag and level == "Character": logging.error( _("Unexpected '{}' numberable field for character marker '{}'" ).format(numberable, marker)) nests = element.find("nests").text if nests not in ("Yes", "No"): logging.error( _("Unexpected '{}' nests field for marker '{}'").format( nests, marker)) nestsFlag = nests == "Yes" hasContent = element.find("hasContent").text if hasContent not in ("Always", "Never", "Sometimes"): logging.error( _("Unexpected '{}' hasContent field for marker '{}'"). format(hasContent, marker)) printed = element.find("printed").text if printed not in ("Yes", "No"): logging.error( _("Unexpected '{}' printed field for marker '{}'").format( printed, marker)) printedFlag = printed == "Yes" closed = element.find("closed").text if closed not in ("No", "Always", "Optional"): logging.error( _("Unexpected '{}' closed field for marker '{}'").format( closed, marker)) occursIn = element.find("occursIn").text if occursIn not in ("Header", "Introduction", "Numbering", "Text", "Canonical Text", "Poetry", "Text, Poetry", "Acrostic verse", "Table row", "Footnote", "Cross-reference", "Front and back matter"): logging.error( _("Unexpected '{}' occursIn field for marker '{}'").format( occursIn, marker)) deprecated = element.find("deprecated").text if deprecated not in ("Yes", "No"): logging.error( _("Unexpected '{}' deprecated field for marker '{}'"). format(deprecated, marker)) deprecatedFlag = deprecated == "Yes" # The optional elements are set to None if they don't exist #closed = None if element.find("closed") is None else element.find("closed").text #if closed is not None and closed not in ( "No", "Always", "Optional" ): logging.error( _("Unexpected '{}' closed field for marker '{}'").format( closed, marker ) ) #if level=="Character" and closed is None: logging.error( _("Entry for character marker '{}' doesn't have a \"closed\" field").format( marker ) ) description = None if element.find( "description") is None else element.find("description").text if description is not None: assert (description) # Now put it into my dictionaries and lists for easy access # The marker is lowercase by definition if "marker" in self._uniqueElements: assert (marker not in rawMarkerDict) # Shouldn't be any duplicates rawMarkerDict[marker] = { "compulsoryFlag": compulsoryFlag, "level": level, "numberableFlag": numberableFlag, "nestsFlag": nestsFlag, "hasContent": hasContent, "occursIn": occursIn, "printedFlag": printedFlag, "closed": closed, "deprecatedFlag": deprecatedFlag, "description": description, "nameEnglish": nameEnglish } combinedMarkerDict[marker] = marker if numberableFlag: # We have some extra work to do conversionDict[marker] = marker + '1' for suffix in ( '1234'): # These are the suffix digits that we allow numberedMarker = marker + suffix backConversionDict[numberedMarker] = marker numberedMarkerList.append(numberedMarker) combinedMarkerDict[numberedMarker] = marker if marker in newlineMarkersList: numberedNewlineMarkersList.append(numberedMarker) combinedNewlineMarkersList.append(numberedMarker) else: numberedInternalMarkersList.append(numberedMarker) combinedInternalMarkersList.append(numberedMarker) if deprecatedFlag: deprecatedMarkersList.append(numberedMarker) else: # it's not numberable numberedMarkerList.append(marker) if marker in newlineMarkersList: numberedNewlineMarkersList.append(marker) else: numberedInternalMarkersList.append(marker) if deprecatedFlag: deprecatedMarkersList.append(marker) #print( conversionDict ); print( backConversionDict ) #print( "newlineMarkersList", len(newlineMarkersList), newlineMarkersList ) #print( "numberedNewlineMarkersList", len(numberedNewlineMarkersList), numberedNewlineMarkersList ) #print( "combinedNewlineMarkersList", len(combinedNewlineMarkersList), combinedNewlineMarkersList ) #print( "internalMarkersList", len(internalMarkersList), internalMarkersList ) #print( "deprecatedMarkersList", len(deprecatedMarkersList), deprecatedMarkersList ) self.__DataDicts = { "rawMarkerDict": rawMarkerDict, "numberedMarkerList": numberedMarkerList, "combinedMarkerDict": combinedMarkerDict, "conversionDict": conversionDict, "backConversionDict": backConversionDict, "newlineMarkersList": newlineMarkersList, "numberedNewlineMarkersList": numberedNewlineMarkersList, "combinedNewlineMarkersList": combinedNewlineMarkersList, "internalMarkersList": internalMarkersList, "numberedInternalMarkersList": numberedInternalMarkersList, "combinedInternalMarkersList": combinedInternalMarkersList, "noteMarkersList": noteMarkersList, "deprecatedMarkersList": deprecatedMarkersList, } return self.__DataDicts # Just delete any of the dictionaries that you don't need # end of importDataToPython def pickle(self, filepath=None): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert (self._XMLtree) self.importDataToPython() assert (self.__DataDicts) if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/") if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + "_Tables.pickle") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) with open(filepath, 'wb') as myFile: pickle.dump(self.__DataDicts, myFile) # end of pickle def exportDataToPython(self, filepath=None): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict(theFile, theDict, dictName, keyComment, fieldsComment): """Exports theDict to theFile.""" assert (isinstance(theDict, dict)) for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len(theDict[dictKey]) if isinstance( theDict[dictKey], (tuple, dict, list)) else 1 break # We only check the first (random) entry we get theFile.write( "{} = {{\n # Key is {}\n # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment)) for dictKey in sorted(theDict.keys()): theFile.write(' {}: {},\n'.format(repr(dictKey), repr(theDict[dictKey]))) theFile.write("}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDict def exportPythonOrderedDict(theFile, theDict, dictName, keyComment, fieldsComment): """Exports theDict to theFile.""" assert (isinstance(theDict, OrderedDict)) for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len(theDict[dictKey]) if isinstance( theDict[dictKey], (tuple, dict, list)) else 1 break # We only check the first (random) entry we get theFile.write( '{} = OrderedDict([\n # Key is {}\n # Fields ({}) are: {}\n' .format(dictName, keyComment, fieldsCount, fieldsComment)) for dictKey in theDict.keys(): theFile.write(' ({}, {}),\n'.format(repr(dictKey), repr(theDict[dictKey]))) theFile.write("]), # end of {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDict def exportPythonList(theFile, theList, listName, dummy, fieldsComment): """Exports theList to theFile.""" assert (isinstance(theList, list)) fieldsCount = len(theList[0]) if isinstance( theList[0], (tuple, dict, list)) else 1 theFile.write('{} = [\n # Fields ({}) are: {}\n'.format( listName, fieldsCount, fieldsComment)) for j, entry in enumerate(theList): theFile.write(' {}, # {}\n'.format(repr(entry), j)) theFile.write("], # end of {} ({} entries)\n\n".format( listName, len(theList))) # end of exportPythonList assert (self._XMLtree) self.importDataToPython() assert (self.__DataDicts) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.py") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) with open(filepath, 'wt') as myFile: myFile.write("# {}\n#\n".format(filepath)) myFile.write( "# This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n#\n" .format(ProgVersion, datetime.now())) if self.titleString: myFile.write("# {} data\n".format(self.titleString)) if self.ProgVersion: myFile.write("# Version: {}\n".format(self.ProgVersion)) if self.dateString: myFile.write("# Date: {}\n#\n".format(self.dateString)) myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag)) myFile.write("from collections import OrderedDict\n\n") dictInfo = { "rawMarkerDict": (exportPythonOrderedDict, "rawMarker (in the original XML order)", "specified"), "numberedMarkerList": (exportPythonList, "marker", "rawMarker"), "combinedMarkerDict": (exportPythonDict, "marker", "rawMarker"), "conversionDict": (exportPythonDict, "rawMarker", "numberedMarker"), "backConversionDict": (exportPythonDict, "numberedMarker", "rawMarker"), "newlineMarkersList": (exportPythonList, "", "rawMarker"), "numberedNewlineMarkersList": (exportPythonList, "", "rawMarker"), "combinedNewlineMarkersList": (exportPythonList, "", "rawMarker"), "internalMarkersList": (exportPythonList, "", "rawMarker"), "numberedInternalMarkersList": (exportPythonList, "", "rawMarker"), "combinedInternalMarkersList": (exportPythonList, "", "rawMarker"), "noteMarkersList": (exportPythonList, "", "rawMarker"), "deprecatedMarkersList": (exportPythonList, "", "rawMarker") } for dictName in self.__DataDicts: exportFunction, keyComment, fieldsComment = dictInfo[dictName] exportFunction(myFile, self.__DataDicts[dictName], dictName, keyComment, fieldsComment) myFile.write("# end of {}".format(os.path.basename(filepath))) # end of exportDataToPython def exportDataToJSON(self, filepath=None): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert (self._XMLtree) self.importDataToPython() assert (self.__DataDicts) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.json") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) with open(filepath, 'wt') as myFile: json.dump(self.__DataDicts, myFile, indent=2) # end of exportDataToJSON def exportDataToC(self, filepath=None): """ Writes the information tables to a .h and .c files that can be included in c and c++ programs. NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically. """ def exportPythonDict(hFile, cFile, theDict, dictName, sortedBy, structure): """ Exports theDict to the .h and .c files. """ def convertEntry(entry): """ Convert special characters in an entry... """ result = "" if isinstance(entry, tuple): for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'" ).format(field, entry)) elif isinstance(entry, dict): for key in sorted(entry.keys()): field = entry[key] if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'" ).format(field, entry)) else: logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry))) return result # end of convertEntry for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count break # We only check the first (random) entry we get #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) ) hFile.write("typedef struct {}EntryStruct {{\n".format(dictName)) for declaration in structure.split(';'): adjDeclaration = declaration.strip() if adjDeclaration: hFile.write(" {};\n".format(adjDeclaration)) hFile.write("}} {}Entry;\n\n".format(dictName)) cFile.write( "const static {}Entry\n {}[{}] = {{\n // Fields ({}) are {}\n // Sorted by {}\n" .format(dictName, dictName, len(theDict), fieldsCount, structure, sortedBy)) for dictKey in sorted(theDict.keys()): if isinstance(dictKey, str): cFile.write(" {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]))) elif isinstance(dictKey, int): cFile.write(" {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]))) else: logging.error( _("Can't handle this type of key data yet: {}").format( dictKey)) cFile.write("]}}; // {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDict assert (self._XMLtree) self.importDataToPython() assert (self.__DataDicts) raise Exception("C export not written yet, sorry.") if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables") hFilepath = filepath + '.h' cFilepath = filepath + '.c' if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format( cFilepath)) # Don't bother telling them about the .h file ifdefName = self._filenameBase.upper() + "_Tables_h" with open(hFilepath, 'wt') as myHFile, open(cFilepath, 'wt') as myCFile: myHFile.write("// {}\n//\n".format(hFilepath)) myCFile.write("// {}\n//\n".format(cFilepath)) lines = "// This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n//\n".format( ProgVersion, datetime.now()) myHFile.write(lines) myCFile.write(lines) if self.titleString: lines = "// {} data\n".format(self.titleString) myHFile.write(lines) myCFile.write(lines) if self.ProgVersion: lines = "// Version: {}\n".format(self.ProgVersion) myHFile.write(lines) myCFile.write(lines) if self.dateString: lines = "// Date: {}\n//\n".format(self.dateString) myHFile.write(lines) myCFile.write(lines) myCFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag)) myHFile.write("\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName)) myCFile.write('#include "{}"\n\n'.format( os.path.basename(hFilepath))) CHAR = "const unsigned char" BYTE = "const int" dictInfo = { "referenceNumberDict": ("referenceNumber (integer 1..255)", "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} marker[3+1];" .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR)), "rawMarkerDict": ("marker", "{} marker[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;" .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR)), "CCELDict": ("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} marker[3+1];". format(CHAR, BYTE, CHAR)), "SBLDict": ("SBLAbbreviation", "{}* SBLAbbreviation; {} referenceNumber; {} marker[3+1];". format(CHAR, BYTE, CHAR)), "EnglishNameDict": ("nameEnglish", "{}* nameEnglish; {} referenceNumber; {} marker[3+1];".format( CHAR, BYTE, CHAR)) } for dictName, dictData in self.__DataDicts.items(): exportPythonDict(myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1]) myHFile.write("#endif // {}\n\n".format(ifdefName)) myHFile.write("// end of {}".format(os.path.basename(hFilepath))) myCFile.write("// end of {}".format(os.path.basename(cFilepath)))
class BibleOrganizationalSystemsConverter: """ Class for handling and converting BibleOrganizationalSystems. """ def __init__(self): """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = "BibleOrganizationalSystems" # These fields are used for parsing the XML self._treeTag = "BibleOrganizationalSystems" self._headerTag = "header" self._mainElementTag = "BibleOrganizationalSystem" # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = ("type", ) self._optionalAttributes = () self._uniqueAttributes = () self._compulsoryElements = ( "referenceAbbreviation", "languageCode", ) self._optionalElements = ( "name", "completionDate", "publicationDate", "copyright", "versificationSystem", "punctuationSystem", "bookOrderSystem", "booksNamesSystem", "translator", "publisher", "derivedFrom", "usesText", "includesBooks", "url", "comment", ) self._uniqueElements = () self._allowedMultiple = ( "name", "translator", "derivedFrom", "usesText", "url", "comment", ) # These are fields that we will fill later self.title, self.version, self.date = None, None, None self.header, self._XMLtree = None, None self.__dataDicts = None # Get the data tables that we need for proper checking self._ISOLanguages = ISO_639_3_Languages().loadData() self._BibleBookOrderSystems = BibleBookOrderSystems().loadData() self._BiblePunctuationSystems = BiblePunctuationSystems().loadData() self._BibleVersificationSystems = BibleVersificationSystems().loadData( ) self._BibleBooksNamesSystems = BibleBooksNamesSystems().loadData() # end of BibleOrganizationalSystemsConverter.__init__ def __str__(self): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ result = "" if self.title: result += ('\n' if result else '') + self.title if self.version: result += ('\n' if result else '') + " Version: {}".format( self.version) if self.date: result += ('\n' if result else '') + " Date: {}".format(self.date) result += ('\n' if result else '') + " Number of entries = {}".format( len(self._XMLtree)) return result # end of BibleOrganizationalSystemsConverter.__str__ def __len__(self): """ Returns the number of items loaded. """ return len(self._XMLtree) # end of BibleOrganizationalSystemsConverter.__len__ def loadAndValidate(self, XMLFilepath=None): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml") # Relative to module, not cwd self._load(XMLFilepath) if Globals.strictCheckingFlag: self._validate() return self # end of BibleOrganizationalSystemsConverter.loadAndValidate def _load(self, XMLFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert (XMLFilepath) self.__XMLFilepath = XMLFilepath assert (self._XMLtree is None or len(self._XMLtree) == 0 ) # Make sure we're not doing this twice if Globals.verbosityLevel > 2: print( _("Loading BibleOrganisationalSystems XML file from '{}'..."). format(self.__XMLFilepath)) self._XMLtree = ElementTree().parse(self.__XMLFilepath) assert (self._XMLtree) # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.header = header self._XMLtree.remove(header) if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] if work.tag == "work": self.version = work.find("version").text self.date = work.find("date").text self.title = work.find("title").text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for '{}' tag)").format( self._headerTag)) else: logging.error( _("Expected to load '{}' but got '{}'").format( self._treeTag, self._XMLtree.tag)) # end of BibleOrganizationalSystemsConverter._load def _validate(self): """ Check/validate the loaded data. """ assert (self._XMLtree) uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory '{}' attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory '{}' attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional '{}' attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional '{}' attribute ('{}') found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found '{}' data repeated in '{}' field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) ID = element.find("referenceAbbreviation").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory '{}' element is missing in record with ID '{}' (record {})" ).format(elementName, ID, j)) elif not element.find(elementName).text: logging.warning( _("Compulsory '{}' element is blank in record with ID '{}' (record {})" ).format(elementName, ID, j)) # Check optional elements for elementName in self._optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional '{}' element is blank in record with ID '{}' (record {})" ).format(elementName, ID, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional '{}' element ('{}') found in record with ID '{}' (record {})" ).format(subelement.tag, subelement.text, ID, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found '{}' data repeated in '{}' element in record with ID '{}' (record {})" ).format(text, elementName, ID, j)) uniqueDict["Element_" + elementName].append(text) # Special checks of particular fields if element.find("includesBooks") is not None: bookList = element.find("includesBooks").text.split() for BBB in bookList: if not Globals.BibleBooksCodes.isValidReferenceAbbreviation( BBB): logging.critical( _("Unrecognized '{}' Bible book code found in 'includesBooks' in record with ID '{}' (record {})" ).format(BBB, ID, j)) if bookList.count(BBB) > 1: logging.error( _("Multiple '{}' Bible book codes found in 'includesBooks' in record with ID '{}' (record {})" ).format(BBB, ID, j)) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) # end of BibleOrganizationalSystemsConverter._validate def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ assert (self._XMLtree) if self.__dataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__dataDicts # We'll create a number of dictionaries with different elements as the key dataDict, indexDict, combinedIndexDict = {}, {}, {} for element in self._XMLtree: bits = {} # Get the required information out of the tree for this element # Start with the compulsory elements and type attribute referenceAbbreviation = element.find("referenceAbbreviation").text bits["referenceAbbreviation"] = referenceAbbreviation myType = element.get("type") bits["type"] = myType if myType not in allowedTypes: logging.error( _("Unrecognized '{}' type for '{}' (expected one of {})"). format(myType, referenceAbbreviation, allowedTypes)) languageCode = element.find("languageCode").text if self._ISOLanguages and not self._ISOLanguages.isValidLanguageCode( languageCode): # Check that we have a valid language code logging.error( "Unrecognized '{}' ISO-639-3 language code in '{}' organisational system" .format(languageCode, referenceAbbreviation)) bits["languageCode"] = languageCode # Now work on the optional elements for name in ("name", "publicationDate", "versificationSystem", "punctuationSystem", "bookOrderSystem", "booksNamesSystem", "derivedFrom", "usesText", "includesBooks"): for nameData in element.findall(name): if name in self._allowedMultiple: # Put multiple entries into a list if name not in bits: bits[name] = [nameData.text] else: bits[name].append(nameData.text) else: # Not allowed multiples if name in bits: logging.error( _("Unexpected multiple {} elements found in {} {}" ).format(name, referenceAbbreviation, myType)) if name == "includesBooks": # special handling bits["includesBooks"] = nameData.text.split() for BBB in bits["includesBooks"]: if not Globals.BibleBooksCodes.isValidReferenceAbbreviation( BBB): logging.error( _("Unrecognized '{}' Bible book code found in 'includesBooks' in {} {}" ).format(BBB, referenceAbbreviation, myType)) else: bits[name] = nameData.text # normal handling extension = '_' + myType extendedRA = referenceAbbreviation if referenceAbbreviation.endswith( extension) else (referenceAbbreviation + extension) dataDict[extendedRA] = bits if referenceAbbreviation in indexDict: indexDict[referenceAbbreviation].append(extendedRA) else: indexDict[referenceAbbreviation] = [extendedRA] if referenceAbbreviation in combinedIndexDict: combinedIndexDict[referenceAbbreviation].append(extendedRA) else: combinedIndexDict[referenceAbbreviation] = [extendedRA] if extendedRA != referenceAbbreviation: #assert( extendedRA not in combinedIndexDict ) if extendedRA in combinedIndexDict: logging.error( _("Found {} in combinedIndexDict").format(extendedRA)) combinedIndexDict[extendedRA] = [extendedRA] assert (len(indexDict) <= len(dataDict)) assert (len(combinedIndexDict) >= len(indexDict)) if Globals.strictCheckingFlag: # We'll do quite a bit more cross-checking now for extendedReferenceAbbreviation, data in dataDict.items(): #print( extendedReferenceAbbreviation, data ) systemType = data['type'] if systemType == 'edition': if 'derivedFrom' in data: logging.error( _("{} shouldn't use 'derivedFrom' '{}'").format( extendedReferenceAbbreviation, data['derivedFrom'])) if 'usesText' not in data: logging.error( _("{} doesn't specify 'usesText'").format( extendedReferenceAbbreviation)) else: # have a 'usesText' list for textAbbrev in data['usesText']: if textAbbrev not in indexDict: logging.error( _("{} specifies unknown '{}' text in 'usesText' field" ).format(extendedReferenceAbbreviation, textAbbrev)) elif len(indexDict[textAbbrev] ) > 1: # it could be ambiguous found = 0 for thisType in ( 'revision', 'translation', 'original'): # but not 'edition' usesTextExtended = textAbbrev + '_' + thisType if usesTextExtended in dataDict: foundOne = usesTextExtended found += 1 assert (found > 0) if found == 1: # ah, it's not actually ambiguous if Globals.verbosityLevel > 2: print( _("Adjusted text used for {} from the ambiguous '{}' to the extended name '{}'" ). format( extendedReferenceAbbreviation, textAbbrev, foundOne)) data['usesText'].remove(textAbbrev) data['usesText'].append(foundOne) else: logging.warning( _("{} specifies ambiguous '{}' (could be {}) texts in 'usesText' field" ).format( extendedReferenceAbbreviation, textAbbrev, indexDict[textAbbrev])) elif systemType == 'revision': if 'derivedFrom' not in data: logging.error( _("{} doesn't specify 'derivedFrom'").format( extendedReferenceAbbreviation)) else: for df in data['derivedFrom']: if df not in indexDict: logging.error( _("{} specifies unknown '{}' text in 'derivedFrom' field" ).format(extendedReferenceAbbreviation, df)) elif len(indexDict[df]) > 1: logging.warning( _("{} specifies ambiguous '{}' (could be {}) texts in 'derivedFrom' field" ).format(extendedReferenceAbbreviation, df, indexDict[df])) elif systemType == 'translation': if 'derivedFrom' not in data: logging.warning( _("{} doesn't specify 'derivedFrom'").format( extendedReferenceAbbreviation)) else: for df in data['derivedFrom']: if df not in indexDict: logging.error( _("{} specifies unknown '{}' text in 'derivedFrom' field" ).format(extendedReferenceAbbreviation, df)) elif len(indexDict[df]) > 1: logging.warning( _("{} specifies ambiguous '{}' (could be {}) texts in 'derivedFrom' field" ).format(extendedReferenceAbbreviation, df, indexDict[df])) elif systemType == 'original': if 'derivedFrom' in data: logging.error( _("{} shouldn't use 'derivedFrom' '{}'").format( extendedReferenceAbbreviation, data['derivedFrom'])) if 'versificationSystem' in data and data[ 'versificationSystem'] not in ('None', 'Unknown'): if not self._BibleVersificationSystems.isValidVersificationSystemName( data['versificationSystem']): extra = "\n Available systems are {}".format( self._BibleVersificationSystems. getAvailableVersificationSystemNames( )) if Globals.verbosityLevel > 2 else '' logging.error( _("Unknown '{}' versification system name in {}{}" ).format(data['versificationSystem'], extendedReferenceAbbreviation, extra)) if 'punctuationSystem' in data and data[ 'punctuationSystem'] not in ('None', 'Unknown'): if not self._BiblePunctuationSystems.isValidPunctuationSystemName( data['punctuationSystem']): extra = "\n Available systems are {}".format( self._BiblePunctuationSystems. getAvailablePunctuationSystemNames( )) if Globals.verbosityLevel > 2 else '' logging.error( _("Unknown '{}' punctuation system name in {}{}"). format(data['punctuationSystem'], extendedReferenceAbbreviation, extra)) self.__dataDicts = dataDict, indexDict, combinedIndexDict return self.__dataDicts # end of importDataToPython def pickle(self, filepath=None): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert (self._XMLtree) self.importDataToPython() assert (self.__dataDicts) if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/") if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + "_Tables.pickle") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) with open(filepath, 'wb') as myFile: pickle.dump(self.__dataDicts, myFile) # end of pickle def exportDataToPython(self, filepath=None): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict(theFile, theDict, dictName, keyComment, fieldsComment): """Exports theDict to theFile.""" theFile.write( "{} = {{\n # Key is {}\n # Fields are: {}\n".format( dictName, keyComment, fieldsComment)) for dictKey in sorted(theDict.keys()): theFile.write(' {}: {},\n'.format(repr(dictKey), theDict[dictKey])) theFile.write("}}\n# end of {}\n\n".format(dictName)) # end of exportPythonDict assert (self._XMLtree) self.importDataToPython() assert (self.__dataDicts) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.py") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) dataDict, indexDict, combinedIndexDict = self.importDataToPython() with open(filepath, 'wt') as myFile: myFile.write("# {}\n#\n".format(filepath)) myFile.write( "# This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n#\n" .format(ProgVersion, datetime.now())) if self.title: myFile.write("# {}\n".format(self.title)) if self.version: myFile.write("# Version: {}\n".format(self.version)) if self.date: myFile.write("# Date: {}\n#\n".format(self.date)) myFile.write( "# {} {} entries loaded from the original XML file.\n". format(len(self._XMLtree), self._treeTag)) #myFile.write( "# {} {} loaded from the original XML files.\n#\n\n".format( len(self.systems), self._treeTag ) ) exportPythonDict( myFile, dataDict, "dataDict", "extendedReferenceAbbreviation", "referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, indexDict, "indexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, combinedIndexDict, "combinedIndexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) # end of exportDataToPython def exportDataToJSON(self, filepath=None): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert (self._XMLtree) self.importDataToPython() assert (self.__dataDicts) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.json") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) with open(filepath, 'wt') as myFile: #myFile.write( "# {}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) ) #if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) #if self.ProgVersion: myFile.write( "# Version: {}\n".format( self.ProgVersion ) ) #if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) #myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) json.dump(self.__dataDicts, myFile, indent=2) #myFile.write( "\n\n# end of {}".format( os.path.basename(filepath) ) ) # end of exportDataToJSON def exportDataToC(self, filepath=None): """ Writes the information tables to a .h file that can be included in c and c++ programs. """ raise Exception("C export not written yet") def exportPythonDict(theFile, theDict, dictName, structName, fieldsComment): """Exports theDict to theFile.""" def convertEntry(entry): """Convert special characters in an entry...""" result = "" for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'" ).format(field, entry)) return result theFile.write( "static struct {} {}[] = {\n // Fields are {}\n".format( structName, dictName, fieldsComment)) for entry in sorted(theDict.keys()): if isinstance(entry, str): theFile.write(" {\"{}\", {}},\n".format( entry, convertEntry(theDict[entry]))) elif isinstance(entry, int): theFile.write(" {{}, {}},\n".format( entry, convertEntry(theDict[entry]))) else: logging.error( _("Can't handle this type of data yet: {}").format( entry)) theFile.write("}; // {}\n\n".format(dictName)) # end of exportPythonDict assert (self._XMLtree) self.importDataToPython() assert (self.__dataDicts) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.h") if Globals.verbosityLevel > 1: print(_("Exporting to {}...").format(filepath)) IDDict, RADict, SBLDict, OADict, PADict, PNDict = self.importDataToPython( ) ifdefName = self._filenameBase.upper() + "_Tables_h" with open(filepath, 'wt') as myFile: myFile.write("// {}\n//\n".format(filepath)) myFile.write( "// This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n//\n" .format(ProgVersion, datetime.now())) if self.title: myFile.write("// {}\n".format(self.title)) if self.version: myFile.write("// Version: {}\n".format(self.version)) if self.date: myFile.write("// Date: {}\n//\n".format(self.date)) myFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag)) myFile.write("#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName)) exportPythonDict( myFile, IDDict, "IDDict", "{int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "id (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, RADict, "RADict", "{char* refAbbrev; int id; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "referenceAbbreviation (sorted), SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, SBLDict, "SBLDict", "{char* SBLAbbrev; int id; char* refAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "SBLAbbreviation (sorted), ReferenceAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, OADict, "OADict", "{char* OSISAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "OSISAbbreviation (sorted), ReferenceAbbreviation, SBLAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, PADict, "PADict", "{char* PTAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTNum; char* EngName;}", "ParatextAbbreviation (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, PNDict, "PNDict", "{char* PTNum; int id; char* PTAbbrev; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* EngName;}", "ParatextNumberString (sorted), ParatextAbbreviation, referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, id, nameEnglish (comment only)" ) myFile.write("#endif // {}\n".format(ifdefName))
class HaggaiXMLBible( Bible ): """ Class for reading, validating, and converting HaggaiXMLBible XML. """ XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}" treeTag = 'XMLBIBLE' infoTag = 'INFORMATION' bookTag = 'BIBLEBOOK' chapterTag = 'CHAPTER' captionTag = 'CAPTION' paragraphTag = 'PARAGRAPH' verseTag = 'VERSE' noteTag = 'NOTE' styleTag = 'STYLE' breakTag = 'BR' def __init__( self, sourceFolder, givenName, encoding='utf-8' ): """ Constructor: just sets up the Haggai Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = 'Haggai XML Bible object' self.objectTypeString = 'Haggai' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join( self.sourceFolder, self.givenName ) self.tree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' ) # Do a preliminary check on the readability of our file if not os.access( self.sourceFilepath, os.R_OK ): print( "HaggaiXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) ) self.name = self.givenName #if self.name is None: #pass # end of HaggaiXMLBible.__init__ def load( self ): """ Load a single source XML file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) ) try: self.tree = ElementTree().parse( self.sourceFilepath ) except ParseError as err: logging.critical( exp("Loader parse error in xml file {}: {} {}").format( self.givenName, sys.exc_info()[0], err ) ) #loadErrors.append( exp("Loader parse error in xml file {}: {} {}").format( self.givenName, sys.exc_info()[0], err ) ) #self.addPriorityError( 100, C, V, _("Loader parse error in xml file {}: {}").format( self.givenName, err ) ) if BibleOrgSysGlobals.debugFlag: assert len ( self.tree ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == HaggaiXMLBible.treeTag: location = "Haggai XML file" BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' ) schema = name = status = BibleType = revision = version = lgid = None for attrib,value in self.tree.items(): if attrib == HaggaiXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation': schema = value elif attrib == "biblename": name = value elif attrib == "lgid": lgid = value # In italian.xml this is set to "german" elif attrib == "status": status = value elif attrib == "type": BibleType = value elif attrib == "revision": revision = value elif attrib == 'version': version = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) ) if name: self.name = name if status: self.status = status if revision: self.revision = revision if version: self.version = version if self.tree[0].tag == 'INFORMATION': self.header = self.tree[0] self.tree.remove( self.header ) self.__validateAndExtractHeader() else: # Handle information records at the END of the file ix = len(self.tree) - 1 if self.tree[ix].tag == 'INFORMATION': self.header = self.tree[ix] self.tree.remove( self.header ) self.__validateAndExtractHeader() # Find the submain (book) containers for element in self.tree: if element.tag == HaggaiXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' ) self.__validateAndExtractBook( element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.bookTag, element.tag ) ) else: logging.error( "Expected to load {!r} but got {!r}".format( HaggaiXMLBible.treeTag, self.tree.tag ) ) self.doPostLoadProcessing() # end of HaggaiXMLBible.load def __validateAndExtractHeader( self ): """ Extracts information out of the header record, such as: <INFORMATION> <title>King James Version</title> <creator></creator> <subject>The Holy Bible</subject> <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description> <publisher>FREE BIBLE SOFTWARE GROUP</publisher> <contributors /> <date>2009-01-23</date> <type>Bible</type> <format>Haggai XML Bible Markup Language</format> <identifier>kjv</identifier> <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source> <language>ENG</language> <coverage>provide the Bible to the nations of the world</coverage> <rights>We believe that this Bible is found in the Public Domain.</rights> </INFORMATION> """ if BibleOrgSysGlobals.debugFlag: assert self.header location = 'Header' BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' ) BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' ) BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' ) # TODO: We probably need to rationalise some of the self.xxx stores for element in self.header: #print( "header", element.tag ) if element.tag == 'title': sublocation = "title in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.title = element.text elif element.tag == 'creator': sublocation = "creator in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.creator = element.text elif element.tag == 'subject': sublocation = "subject in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.subject = element.text elif element.tag == 'description': sublocation = "description in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.description = element.text elif element.tag == 'publisher': sublocation = "publisher in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.publisher = element.text elif element.tag == 'contributor': sublocation = "contributor in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' ) if element.text: try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one elif element.tag == 'contributors': sublocation = "contributors in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.contributors = element.text elif element.tag == 'date': sublocation = "date in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.date = element.text elif element.tag == 'type': sublocation = "type in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.documentType = element.text elif element.tag == 'format': sublocation = "format in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text if BibleOrgSysGlobals.debugFlag: assert element.text == 'Haggai XML Bible Markup Language' elif element.tag == 'identifier': sublocation = "identifier in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.identifier = element.text elif element.tag == 'source': sublocation = "source in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.source = element.text elif element.tag == 'language': sublocation = "language in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.language = element.text elif element.tag == 'coverage': sublocation = "coverage in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.coverage = element.text elif element.tag == 'rights': sublocation = "rights in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.rights = element.text else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) ) # end of HaggaiXMLBible.__validateAndExtractHeader def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Haggai XML Bible Book object' thisBook.objectTypeString = 'Haggai' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == HaggaiXMLBible.captionTag: sublocation = "caption in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' ) thisBook.addLine( 'mt', element.text ) elif element.tag == HaggaiXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook ) # end of HaggaiXMLBible.__validateAndExtractBook def __validateAndExtractChapter( self, BBB, thisBook, chapter ): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter…") ) # Process the chapter attributes first chapterNumber = numVerses = None for attrib,value in chapter.items(): if attrib=="cnumber": chapterNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) ) if chapterNumber: #print( BBB, 'c', chapterNumber ) thisBook.addLine( 'c', chapterNumber ) else: logging.error( "Missing 'n' attribute in chapter element for {}".format( BBB ) ) for element in chapter: if element.tag == HaggaiXMLBible.paragraphTag: location = "paragraph in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractParagraph( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.verseTag+'disabled': location = "verse in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms location = "caption in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' ) # Handle caption attributes vRef = None for attrib,value in element.items(): if attrib=="vref": vRef = value if BibleOrgSysGlobals.debugFlag: assert vRef == '1' else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert vRef vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) ) if vText: # This is the main text of the caption #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) ) # end of HaggaiXMLBible.__validateAndExtractChapter def __validateAndExtractParagraph( self, BBB, chapterNumber, thisBook, paragraph ): """ Check/validate and extract paragraph data from the given XML book record finding and saving paragraphs and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML paragraph…") ) location = "paragraph in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoAttributes( paragraph, location, 'brgw3' ) BibleOrgSysGlobals.checkXMLNoText( paragraph, location, 'brgw3' ) BibleOrgSysGlobals.checkXMLNoTail( paragraph, location, 'brgw3' ) thisBook.addLine( 'p', '' ) # Handle verse subelements (verses) for element in paragraph: if element.tag == HaggaiXMLBible.verseTag: location = "verse in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms location = "caption in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' ) # Handle caption attributes vRef = None for attrib,value in element.items(): if attrib=="vref": vRef = value if BibleOrgSysGlobals.debugFlag: assert vRef == '1' else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert vRef vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) ) if vText: # This is the main text of the caption #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) ) # end of HaggaiXMLBible.__validateAndExtractParagraph def __validateAndExtractVerse( self, BBB, chapterNumber, thisBook, verse ): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML verse…") ) location = "verse in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( verse, location, 'l5ks' ) # Handle verse attributes verseNumber = toVerseNumber = None for attrib,value in verse.items(): if attrib=="vnumber": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert verseNumber location = "{}:{}".format( location, verseNumber ) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) # Handle verse subelements (notes and styled portions) for subelement in verse: if subelement.tag == HaggaiXMLBible.noteTag: sublocation = "note in " + location noteType = None for attrib,value in subelement.items(): if attrib=="type": noteType = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if noteType and noteType not in ('variant',): logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) nText, nTail = subelement.text, subelement.tail #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) if nTail: if '\n' in nTail: print( "HaggaiXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) nTail = nTail.replace( '\n', ' ' ) vText += nTail for subsubelement in subelement: if subsubelement.tag == HaggaiXMLBible.styleTag: subsublocation = "style in " + sublocation BibleOrgSysGlobals.checkXMLNoSubelements( subsubelement, subsublocation, 'fyt4' ) fs = css = idStyle = None for attrib,value in subsubelement.items(): if attrib=='fs': fs = value #elif attrib=="css": css = value #elif attrib=="id": idStyle = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subsubelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle SFM = None if fs == 'italic': SFM = '\\it' elif fs == 'super': SFM = '\\bdit' elif fs == 'emphasis': SFM = '\\em' else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt #if css == "font-style:italic": SFM = '\\it' #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' #elif css == "color:#FF0000": SFM = '\\em' #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' #elif css is None and idStyle=='cl:divineName': SFM = '\\nd' #else: print( "css is", css, "idStyle is", idStyle ); halt sText, sTail = subsubelement.text.strip(), subsubelement.tail if BibleOrgSysGlobals.debugFlag: assert sText if SFM: vText += SFM+' ' + sText + SFM+'*' else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() else: logging.error( "Expected to find {} but got {!r} in {}".format( HaggaiXMLBible.styleTag, subsubelement.tag, sublocation ) ) elif subelement.tag == HaggaiXMLBible.styleTag: sublocation = "style in " + location BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) fs = css = idStyle = None for attrib,value in subelement.items(): if attrib=="fs": fs = value #elif attrib=="css": css = value #elif attrib=="id": idStyle = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert fs SFM = None if fs == 'super': SFM = '\\bdit' elif fs == 'emphasis': SFM = '\\em' else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt #if css == "font-style:italic": SFM = '\\it' #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' #elif css == "color:#FF0000": SFM = '\\em' #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' #elif css is None and idStyle=='cl:divineName': SFM = '\\nd' #else: print( "css is", css, "idStyle is", idStyle ); halt sText, sTail = subelement.text.strip(), subelement.tail if BibleOrgSysGlobals.debugFlag: assert sText #print( BBB, chapterNumber, sublocation ) if SFM: vText += SFM+' ' + sText + SFM+'*' else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() elif subelement.tag == HaggaiXMLBible.breakTag: sublocation = "line break in " + location BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) art = None for attrib,value in subelement.items(): if attrib=="art": art = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl' #print( BBB, chapterNumber, verseNumber ) #assert vText if vText: thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None vText = '' thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) #bTail = subelement.tail #if bTail: vText = bTail.strip() else: logging.error( "Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: print( "HaggaiXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) vText = vText.replace( '\n', ' ' ) thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
class USFM3MarkersConverter: """ Class for reading, validating, and converting USFM3Markers. This is only intended as a transitory class (used at start-up). The USFM3Markers class has functions more generally useful. """ def __init__( self ) -> None: # We can't give this parameters because of the singleton """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = 'USFM3Markers' # These fields are used for parsing the XML self._treeTag = 'USFM3Markers' self._headerTag = 'header' self._mainElementTag = 'USFMMarker' # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = () self._optionalAttributes = () self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes self._compulsoryElements = ( 'nameEnglish', 'marker', 'compulsory', 'level', 'highestNumberSuffix', 'nests', 'hasContent', 'printed', 'closed', 'occursIn', 'deprecated', ) self._optionalElements = ('description', ) #self._uniqueElements = self._compulsoryElements + self.optionalElements self._uniqueElements = ( 'nameEnglish', 'marker', ) # These are fields that we will fill later self._XMLheader, self._XMLTree = None, None self.__DataDicts = {} # Used for import self.titleString = self.PROGRAM_VERSION = self.dateString = '' # end of __init__ def loadAndValidate(self, XMLFileOrFilepath=None): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLTree is None: # We mustn't have already have loaded the data if XMLFileOrFilepath is None: # XMLFileOrFilepath = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '.xml' ) # Relative to module, not cwd import importlib.resources # From Python 3.7 onwards -- handles zipped resources also XMLFileOrFilepath = importlib.resources.open_text( 'BibleOrgSys.DataFiles', self._filenameBase + '.xml') self.__load(XMLFileOrFilepath) if BibleOrgSysGlobals.strictCheckingFlag: self.__validate() else: # The data must have been already loaded if XMLFileOrFilepath is not None and XMLFileOrFilepath != self.__XMLFileOrFilepath: logging.error( _("Bible books codes are already loaded -- your different filepath of {!r} was ignored" ).format(XMLFileOrFilepath)) return self # end of loadAndValidate def __load(self, XMLFileOrFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert XMLFileOrFilepath self.__XMLFileOrFilepath = XMLFileOrFilepath assert self._XMLTree is None or len( self._XMLTree) == 0 # Make sure we're not doing this twice vPrint( 'Info', debuggingThisModule, _("Loading USFM3Markers XML file from {!r}…").format( self.__XMLFileOrFilepath)) self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath) assert self._XMLTree # Fail here if we didn't load anything at all if self._XMLTree.tag == self._treeTag: header = self._XMLTree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLTree.remove(header) BibleOrgSysGlobals.checkXMLNoText(header, 'header') BibleOrgSysGlobals.checkXMLNoTail(header, 'header') BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header') if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText(work, "work in header") BibleOrgSysGlobals.checkXMLNoTail(work, "work in header") BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header") if work.tag == "work": self.PROGRAM_VERSION = work.find('version').text self.dateString = work.find('date').text self.titleString = work.find('title').text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag))) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( element.tail)) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLTree.tag)) # end of __load def __validate(self): """ Check/validate the loaded data. """ assert self._XMLTree uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLTree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Get the marker to use as a record ID marker = element.find("marker").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory {!r} element is missing in record with marker {!r} (record {})" ).format(elementName, marker, j)) elif not element.find(elementName).text: logging.warning( _("Compulsory {!r} element is blank in record with marker {!r} (record {})" ).format(elementName, marker, j)) # Check optional elements for elementName in self._optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional {!r} element is blank in record with marker {!r} (record {})" ).format(elementName, marker, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with marker {!r} (record {})" ).format(subelement.tag, subelement.text, marker, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with marker {!r} (record {})" ).format(text, elementName, marker, j)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}" ).format(element.tail, element.tag, j)) if self._XMLTree.tail is not None and self._XMLTree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLTree.tail, self._XMLTree.tag)) # end of __validate def __str__(self) -> str: """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ indent = 2 result = "USFM3MarkersConverter object" if self.titleString: result += ('\n' if result else '') + ' ' * indent + _("Title: {}").format( self.titleString) if self.PROGRAM_VERSION: result += ('\n' if result else '') + ' ' * indent + _("Version: {}").format( self.PROGRAM_VERSION) if self.dateString: result += ('\n' if result else '' ) + ' ' * indent + _("Date: {}").format(self.dateString) if self._XMLTree is not None: result += ('\n' if result else '') + ' ' * indent + _( "Number of entries = {:,}").format(len(self._XMLTree)) return result # end of __str__ def __len__(self): """ Returns the number of SFM markers loaded. """ return len(self._XMLTree) # end of __len__ def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLTree if you prefer.) """ assert self._XMLTree if self.__DataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__DataDicts # Load and validate entries and create the dictionaries and lists # Note that the combined lists include the numbered markers, e.g., s as well as s1, s2, … rawMarkerDict, numberedMarkerList, combinedMarkerDict, = {}, [], {} conversionDict, backConversionDict = {}, {} newlineMarkersList, numberedNewlineMarkersList, combinedNewlineMarkersList = [], [], [] internalMarkersList, numberedInternalMarkersList, combinedInternalMarkersList = [], [], [] noteMarkersList, deprecatedMarkersList = [], [] for element in self._XMLTree: # Get the required information out of the tree for this element # Start with the compulsory elements nameEnglish = element.find( 'nameEnglish' ).text # This name is really just a comment element #dPrint( 'Quiet', debuggingThisModule, "Processing", nameEnglish ) marker = element.find('marker').text if marker.lower() != marker: logging.error( _("Marker {!r} should be lower case").format(marker)) compulsory = element.find('compulsory').text if compulsory not in ('Yes', 'No'): logging.error( _("Unexpected {!r} compulsory field for marker {!r}"). format(compulsory, marker)) level = element.find('level').text compulsoryFlag = compulsory == 'Yes' if level == 'Newline': newlineMarkersList.append(marker) combinedNewlineMarkersList.append(marker) elif level == 'Internal': internalMarkersList.append(marker) elif level == 'Note': noteMarkersList.append(marker) else: logging.error( _("Unexpected {!r} level field for marker {!r}").format( level, marker)) highestNumberSuffix = element.find('highestNumberSuffix').text if highestNumberSuffix not in ('None', '3', '4', '5', '6', '7', '8', '9'): logging.error( _("Unexpected {!r} highestNumberSuffix field for marker {!r}" ).format(highestNumberSuffix, marker)) numberableFlag = highestNumberSuffix != 'None' if numberableFlag and level == 'Character': logging.error( _("Unexpected {!r} highestNumberSuffix field for character marker {!r}" ).format(highestNumberSuffix, marker)) nests = element.find("nests").text if nests not in ('Yes', 'No'): logging.error( _("Unexpected {!r} nests field for marker {!r}").format( nests, marker)) nestsFlag = nests == 'Yes' hasContent = element.find('hasContent').text if hasContent not in ('Always', 'Never', 'Sometimes'): logging.error( _("Unexpected {!r} hasContent field for marker {!r}"). format(hasContent, marker)) printed = element.find('printed').text if printed not in ('Yes', 'No'): logging.error( _("Unexpected {!r} printed field for marker {!r}").format( printed, marker)) printedFlag = printed == 'Yes' closed = element.find('closed').text if closed not in ('No', 'Always', 'Self', 'Optional'): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker)) occursIn = element.find('occursIn').text if occursIn not in ('Header', 'Introduction', 'Numbering', 'Text', 'Canonical Text', 'Poetry', 'Text, Poetry', 'Acrostic verse', 'Table row', 'Footnote', 'Cross-reference', 'Front and back matter'): logging.error( _("Unexpected {!r} occursIn field for marker {!r}").format( occursIn, marker)) deprecated = element.find('deprecated').text if deprecated not in ('Yes', 'No'): logging.error( _("Unexpected {!r} deprecated field for marker {!r}"). format(deprecated, marker)) deprecatedFlag = deprecated == 'Yes' # The optional elements are set to None if they don't exist #closed = None if element.find("closed") is None else element.find("closed").text #if closed is not None and closed not in ( "No", "Always", "Optional" ): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker ) ) #if level=="Character" and closed is None: logging.error( _("Entry for character marker {!r} doesn't have a \"closed\" field").format( marker ) ) description = None if element.find( 'description') is None else element.find('description').text if description is not None: assert description # Now put it into my dictionaries and lists for easy access # The marker is lowercase by definition if 'marker' in self._uniqueElements: assert marker not in rawMarkerDict # Shouldn't be any duplicates rawMarkerDict[marker] = { 'compulsoryFlag': compulsoryFlag, 'level': level, 'highestNumberSuffix': highestNumberSuffix, 'nestsFlag': nestsFlag, 'hasContent': hasContent, 'occursIn': occursIn, 'printedFlag': printedFlag, 'closed': closed, 'deprecatedFlag': deprecatedFlag, 'description': description, 'nameEnglish': nameEnglish } combinedMarkerDict[marker] = marker if highestNumberSuffix != 'None': # We have some extra work to do if marker.endswith('-s') or marker.endswith('-e'): assert marker in ('qt-s', 'qt-e' ) # Only ones we know of so far # Numberical suffix can't just be appended to the end of these conversionDict[marker] = f'{marker[:-2]}1{marker[-2:]}' else: # not a milestone start/end marker conversionDict[marker] = marker + '1' for suffix in range( 1, int(highestNumberSuffix) + 1): # These are the suffix digits that we allow if marker.endswith('-s') or marker.endswith('-e'): # Numberical suffix can't just be appended to the end of these numberedMarker = f'{marker[:-2]}{suffix}{marker[-2:]}' #dPrint( 'Quiet', debuggingThisModule, f"Marker '{marker}' led to '{numberedMarker}'" ) else: # not a milestone start/end marker numberedMarker = marker + str(suffix) backConversionDict[numberedMarker] = marker numberedMarkerList.append(numberedMarker) combinedMarkerDict[numberedMarker] = marker if marker in newlineMarkersList: numberedNewlineMarkersList.append(numberedMarker) combinedNewlineMarkersList.append(numberedMarker) else: numberedInternalMarkersList.append(numberedMarker) combinedInternalMarkersList.append(numberedMarker) if deprecatedFlag: deprecatedMarkersList.append(numberedMarker) else: # it's not numberable numberedMarkerList.append(marker) if marker in newlineMarkersList: numberedNewlineMarkersList.append(marker) else: numberedInternalMarkersList.append(marker) if deprecatedFlag: deprecatedMarkersList.append(marker) #dPrint( 'Quiet', debuggingThisModule, conversionDict ); vPrint( 'Quiet', debuggingThisModule, backConversionDict ) #dPrint( 'Quiet', debuggingThisModule, "newlineMarkersList", len(newlineMarkersList), newlineMarkersList ) #dPrint( 'Quiet', debuggingThisModule, "numberedNewlineMarkersList", len(numberedNewlineMarkersList), numberedNewlineMarkersList ) #dPrint( 'Quiet', debuggingThisModule, "combinedNewlineMarkersList", len(combinedNewlineMarkersList), combinedNewlineMarkersList ) #dPrint( 'Quiet', debuggingThisModule, "internalMarkersList", len(internalMarkersList), internalMarkersList ) #dPrint( 'Quiet', debuggingThisModule, "deprecatedMarkersList", len(deprecatedMarkersList), deprecatedMarkersList ) self.__DataDicts = { "rawMarkerDict": rawMarkerDict, "numberedMarkerList": numberedMarkerList, "combinedMarkerDict": combinedMarkerDict, "conversionDict": conversionDict, "backConversionDict": backConversionDict, "newlineMarkersList": newlineMarkersList, "numberedNewlineMarkersList": numberedNewlineMarkersList, "combinedNewlineMarkersList": combinedNewlineMarkersList, "internalMarkersList": internalMarkersList, "numberedInternalMarkersList": numberedInternalMarkersList, "combinedInternalMarkersList": combinedInternalMarkersList, "noteMarkersList": noteMarkersList, "deprecatedMarkersList": deprecatedMarkersList, } return self.__DataDicts # Just delete any of the dictionaries that you don't need # end of importDataToPython def pickle(self, filepath=None): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert self._XMLTree self.importDataToPython() assert self.__DataDicts if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + '_Tables.pickle') vPrint('Quiet', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wb') as myFile: pickle.dump(self.__DataDicts, myFile) # end of pickle def exportDataToPython(self, filepath=None): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict(theFile, theDict, dictName, keyComment, fieldsComment): """Exports theDict to theFile.""" assert isinstance(theDict, dict) for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len(theDict[dictKey]) if isinstance( theDict[dictKey], (tuple, dict, list)) else 1 break # We only check the first (random) entry we get theFile.write( "{} = {{\n # Key is {}\n # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment)) for dictKey in sorted(theDict.keys()): theFile.write(' {}: {},\n'.format(repr(dictKey), repr(theDict[dictKey]))) theFile.write("}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDict #def exportPythonOrderedDict( theFile, theDict, dictName, keyComment, fieldsComment ): #"""Exports theDict to theFile.""" #assert isinstance( theDict, OrderedDict ) #for dictKey in theDict.keys(): # Have to iterate this :( #fieldsCount = len( theDict[dictKey] ) if isinstance( theDict[dictKey], (tuple,dict,list) ) else 1 #break # We only check the first (random) entry we get #theFile.write( '{} = OrderedDict([\n # Key is {}\n # Fields ({}) are: {}\n'.format( dictName, keyComment, fieldsCount, fieldsComment ) ) #for dictKey in theDict.keys(): #theFile.write( ' ({}, {}),\n'.format( repr(dictKey), repr(theDict[dictKey]) ) ) #theFile.write( "]), # end of {} ({} entries)\n\n".format( dictName, len(theDict) ) ) ## end of exportPythonOrderedDict def exportPythonList(theFile, theList, listName, dummy, fieldsComment): """Exports theList to theFile.""" assert isinstance(theList, list) fieldsCount = len(theList[0]) if isinstance( theList[0], (tuple, dict, list)) else 1 theFile.write('{} = [\n # Fields ({}) are: {}\n'.format( listName, fieldsCount, fieldsComment)) for j, entry in enumerate(theList): theFile.write(' {}, # {}\n'.format(repr(entry), j)) theFile.write("], # end of {} ({} entries)\n\n".format( listName, len(theList))) # end of exportPythonList assert self._XMLTree self.importDataToPython() assert self.__DataDicts if not filepath: filepath = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '_Tables.py') vPrint('Quiet', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wt', encoding='utf-8') as myFile: myFile.write("# {}\n#\n".format(filepath)) myFile.write( "# This UTF-8 file was automatically generated by USFM3Markers.py V{} on {}\n#\n" .format(PROGRAM_VERSION, datetime.now())) if self.titleString: myFile.write("# {} data\n".format(self.titleString)) if self.PROGRAM_VERSION: myFile.write("# Version: {}\n".format(self.PROGRAM_VERSION)) if self.dateString: myFile.write("# Date: {}\n#\n".format(self.dateString)) myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLTree), self._treeTag)) #myFile.write( "from collections import OrderedDict\n\n" ) dictInfo = { "rawMarkerDict": (exportPythonDict, "rawMarker (in the original XML order)", "specified"), "numberedMarkerList": (exportPythonList, "marker", "rawMarker"), "combinedMarkerDict": (exportPythonDict, "marker", "rawMarker"), "conversionDict": (exportPythonDict, "rawMarker", "numberedMarker"), "backConversionDict": (exportPythonDict, "numberedMarker", "rawMarker"), "newlineMarkersList": (exportPythonList, "", "rawMarker"), "numberedNewlineMarkersList": (exportPythonList, "", "rawMarker"), "combinedNewlineMarkersList": (exportPythonList, "", "rawMarker"), "internalMarkersList": (exportPythonList, "", "rawMarker"), "numberedInternalMarkersList": (exportPythonList, "", "rawMarker"), "combinedInternalMarkersList": (exportPythonList, "", "rawMarker"), "noteMarkersList": (exportPythonList, "", "rawMarker"), "deprecatedMarkersList": (exportPythonList, "", "rawMarker") } for dictName in self.__DataDicts: exportFunction, keyComment, fieldsComment = dictInfo[dictName] exportFunction(myFile, self.__DataDicts[dictName], dictName, keyComment, fieldsComment) myFile.write("# end of {}".format(os.path.basename(filepath))) # end of exportDataToPython def exportDataToJSON(self, filepath=None): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert self._XMLTree self.importDataToPython() assert self.__DataDicts if not filepath: filepath = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '_Tables.json') vPrint('Quiet', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wt', encoding='utf-8') as myFile: json.dump(self.__DataDicts, myFile, ensure_ascii=False, indent=2) # end of exportDataToJSON def exportDataToC(self, filepath=None): """ Writes the information tables to a .h and .c files that can be included in c and c++ programs. NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically. """ def exportPythonDict(hFile, cFile, theDict, dictName, sortedBy, structure): """ Exports theDict to the .h and .c files. """ def convertEntry(entry): """ Convert special characters in an entry… """ result = "" if isinstance(entry, tuple): for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}" ).format(field, entry)) elif isinstance(entry, dict): for key in sorted(entry.keys()): field = entry[key] if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance(field, str): result += '"' + str(field).replace('"', '\\"') + '"' elif isinstance(field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}" ).format(field, entry)) else: logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry))) return result # end of convertEntry for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count break # We only check the first (random) entry we get #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) ) hFile.write("typedef struct {}EntryStruct {{\n".format(dictName)) for declaration in structure.split(';'): adjDeclaration = declaration.strip() if adjDeclaration: hFile.write(" {};\n".format(adjDeclaration)) hFile.write("}} {}Entry;\n\n".format(dictName)) cFile.write( "const static {}Entry\n {}[{}] = {{\n // Fields ({}) are {}\n // Sorted by {}\n" .format(dictName, dictName, len(theDict), fieldsCount, structure, sortedBy)) for dictKey in sorted(theDict.keys()): if isinstance(dictKey, str): cFile.write(" {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]))) elif isinstance(dictKey, int): cFile.write(" {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]))) else: logging.error( _("Can't handle this type of key data yet: {}").format( dictKey)) cFile.write("]}}; // {} ({} entries)\n\n".format( dictName, len(theDict))) # end of exportPythonDict assert self._XMLTree self.importDataToPython() assert self.__DataDicts raise Exception("C export not written yet, sorry.") if not filepath: filepath = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '_Tables') hFilepath = filepath + '.h' cFilepath = filepath + '.c' vPrint('Quiet', debuggingThisModule, _("Exporting to {}…").format( cFilepath)) # Don't bother telling them about the .h file ifdefName = self._filenameBase.upper() + "_Tables_h" with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \ open( cFilepath, 'wt', encoding='utf-8' ) as myCFile: myHFile.write("// {}\n//\n".format(hFilepath)) myCFile.write("// {}\n//\n".format(cFilepath)) lines = "// This UTF-8 file was automatically generated by USFM3Markers.py V{} on {}\n//\n".format( PROGRAM_VERSION, datetime.now()) myHFile.write(lines) myCFile.write(lines) if self.titleString: lines = "// {} data\n".format(self.titleString) myHFile.write(lines) myCFile.write(lines) if self.PROGRAM_VERSION: lines = "// Version: {}\n".format(self.PROGRAM_VERSION) myHFile.write(lines) myCFile.write(lines) if self.dateString: lines = "// Date: {}\n//\n".format(self.dateString) myHFile.write(lines) myCFile.write(lines) myCFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLTree), self._treeTag)) myHFile.write("\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName)) myCFile.write('#include "{}"\n\n'.format( os.path.basename(hFilepath))) CHAR = "const unsigned char" BYTE = "const int" dictInfo = { "referenceNumberDict": ("referenceNumber (integer 1..255)", "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} marker[3+1];" .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR)), "rawMarkerDict": ("marker", "{} marker[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;" .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR)), "CCELDict": ("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} marker[3+1];". format(CHAR, BYTE, CHAR)), "SBLDict": ("SBLAbbreviation", "{}* SBLAbbreviation; {} referenceNumber; {} marker[3+1];". format(CHAR, BYTE, CHAR)), "EnglishNameDict": ("nameEnglish", "{}* nameEnglish; {} referenceNumber; {} marker[3+1];".format( CHAR, BYTE, CHAR)) } for dictName, dictData in self.__DataDicts.items(): exportPythonDict(myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1]) myHFile.write("#endif // {}\n\n".format(ifdefName)) myHFile.write("// end of {}".format(os.path.basename(hFilepath))) myCFile.write("// end of {}".format(os.path.basename(cFilepath)))
class USFMMarkersConverter: """ Class for reading, validating, and converting USFMMarkers. This is only intended as a transitory class (used at start-up). The USFMMarkers class has functions more generally useful. """ def __init__( self ): # We can't give this parameters because of the singleton """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = "USFMMarkers" # These fields are used for parsing the XML self._treeTag = "USFMMarkers" self._headerTag = "header" self._mainElementTag = "USFMMarker" # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = () self._optionalAttributes = () self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes self._compulsoryElements = ( "nameEnglish", "marker", "compulsory", "level", "numberable", "nests", "hasContent", "printed", "closed", "occursIn", "deprecated", ) self._optionalElements = ( "description", ) #self._uniqueElements = self._compulsoryElements + self.optionalElements self._uniqueElements = ( "nameEnglish", "marker", ) # These are fields that we will fill later self._XMLheader, self._XMLtree = None, None self.__DataDicts = {} # Used for import self.titleString = self.ProgVersion = self.dateString = '' # end of __init__ def loadAndValidate( self, XMLFilepath=None ): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml" ) # Relative to module, not cwd self.__load( XMLFilepath ) if BibleOrgSysGlobals.strictCheckingFlag: self.__validate() else: # The data must have been already loaded if XMLFilepath is not None and XMLFilepath!=self.__XMLFilepath: logging.error( _("Bible books codes are already loaded -- your different filepath of {!r} was ignored").format( XMLFilepath ) ) return self # end of loadAndValidate def __load( self, XMLFilepath ): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert XMLFilepath self.__XMLFilepath = XMLFilepath assert self._XMLtree is None or len(self._XMLtree)==0 # Make sure we're not doing this twice if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading USFMMarkers XML file from {!r}…").format( self.__XMLFilepath ) ) self._XMLtree = ElementTree().parse( self.__XMLFilepath ) assert self._XMLtree # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLtree.remove( header ) BibleOrgSysGlobals.checkXMLNoText( header, "header" ) BibleOrgSysGlobals.checkXMLNoTail( header, "header" ) BibleOrgSysGlobals.checkXMLNoAttributes( header, "header" ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" ) if work.tag == "work": self.ProgVersion = work.find('version').text self.dateString = work.find("date").text self.titleString = work.find("title").text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag ) ) ) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( element.tail ) ) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag ) ) # end of __load def __validate( self ): """ Check/validate the loaded data. """ assert self._XMLtree uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] expectedID = 1 for j,element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag ) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) ) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) # Get the marker to use as a record ID marker = element.find("marker").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find( elementName ) is None: logging.error( _("Compulsory {!r} element is missing in record with marker {!r} (record {})").format( elementName, marker, j ) ) elif not element.find( elementName ).text: logging.warning( _("Compulsory {!r} element is blank in record with marker {!r} (record {})").format( elementName, marker, j ) ) # Check optional elements for elementName in self._optionalElements: if element.find( elementName ) is not None: if not element.find( elementName ).text: logging.warning( _("Optional {!r} element is blank in record with marker {!r} (record {})").format( elementName, marker, j ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with marker {!r} (record {})").format( subelement.tag, subelement.text, marker, j ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with marker {!r} (record {})").format( text, elementName, marker, j ) ) uniqueDict["Element_"+elementName].append( text ) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) ) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}").format( element.tail, element.tag, j ) ) if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag ) ) # end of __validate def __str__( self ): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ indent = 2 result = "USFMMarkersConverter object" if self.titleString: result += ('\n' if result else '') + ' '*indent + _("Title: {}").format( self.titleString ) if self.ProgVersion: result += ('\n' if result else '') + ' '*indent + _("Version: {}").format( self.ProgVersion ) if self.dateString: result += ('\n' if result else '') + ' '*indent + _("Date: {}").format( self.dateString ) if self._XMLtree is not None: result += ('\n' if result else '') + ' '*indent + _("Number of entries = {}").format( len(self._XMLtree) ) return result # end of __str__ def __len__( self ): """ Returns the number of SFM markers loaded. """ return len( self._XMLtree ) # end of __len__ def importDataToPython( self ): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ assert self._XMLtree if self.__DataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__DataDicts # Load and validate entries and create the dictionaries and lists # Note that the combined lists include the numbered markers, e.g., s as well as s1, s2, … rawMarkerDict, numberedMarkerList, combinedMarkerDict, = OrderedDict(), [], {} conversionDict, backConversionDict = {}, {} newlineMarkersList, numberedNewlineMarkersList, combinedNewlineMarkersList = [], [], [] internalMarkersList, numberedInternalMarkersList, combinedInternalMarkersList = [], [], [] noteMarkersList, deprecatedMarkersList = [], [] for element in self._XMLtree: # Get the required information out of the tree for this element # Start with the compulsory elements nameEnglish = element.find('nameEnglish').text # This name is really just a comment element marker = element.find('marker').text if marker.lower() != marker: logging.error( _("Marker {!r} should be lower case").format( marker ) ) compulsory = element.find('compulsory').text if compulsory not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} compulsory field for marker {!r}").format( compulsory, marker ) ) level = element.find('level').text compulsoryFlag = compulsory == 'Yes' if level == 'Newline': newlineMarkersList.append( marker ); combinedNewlineMarkersList.append( marker ) elif level == 'Internal': internalMarkersList.append( marker ) elif level == 'Note': noteMarkersList.append( marker ) else: logging.error( _("Unexpected {!r} level field for marker {!r}").format( level, marker ) ) numberable = element.find('numberable').text if numberable not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} numberable field for marker {!r}").format( numberable, marker ) ) numberableFlag = numberable == "Yes" if numberableFlag and level == "Character": logging.error( _("Unexpected {!r} numberable field for character marker {!r}").format( numberable, marker ) ) nests = element.find("nests").text if nests not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} nests field for marker {!r}").format( nests, marker ) ) nestsFlag = nests == 'Yes' hasContent = element.find('hasContent').text if hasContent not in ( 'Always', 'Never', 'Sometimes' ): logging.error( _("Unexpected {!r} hasContent field for marker {!r}").format( hasContent, marker ) ) printed = element.find('printed').text if printed not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} printed field for marker {!r}").format( printed, marker ) ) printedFlag = printed == 'Yes' closed = element.find('closed').text if closed not in ( 'No', 'Always', 'Optional' ): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker ) ) occursIn = element.find('occursIn').text if occursIn not in ( 'Header', 'Introduction', 'Numbering', 'Text', 'Canonical Text', 'Poetry', 'Text, Poetry', 'Acrostic verse', 'Table row', 'Footnote', 'Cross-reference', 'Front and back matter' ): logging.error( _("Unexpected {!r} occursIn field for marker {!r}").format( occursIn, marker ) ) deprecated = element.find('deprecated').text if deprecated not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} deprecated field for marker {!r}").format( deprecated, marker ) ) deprecatedFlag = deprecated == 'Yes' # The optional elements are set to None if they don't exist #closed = None if element.find("closed") is None else element.find("closed").text #if closed is not None and closed not in ( "No", "Always", "Optional" ): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker ) ) #if level=="Character" and closed is None: logging.error( _("Entry for character marker {!r} doesn't have a \"closed\" field").format( marker ) ) description = None if element.find("description") is None else element.find("description").text if description is not None: assert description # Now put it into my dictionaries and lists for easy access # The marker is lowercase by definition if "marker" in self._uniqueElements: assert marker not in rawMarkerDict # Shouldn't be any duplicates rawMarkerDict[marker] = { "compulsoryFlag":compulsoryFlag, "level":level, "numberableFlag":numberableFlag, "nestsFlag":nestsFlag, "hasContent":hasContent, "occursIn":occursIn, "printedFlag":printedFlag, "closed":closed, "deprecatedFlag":deprecatedFlag, "description":description, "nameEnglish":nameEnglish } combinedMarkerDict[marker] = marker if numberableFlag: # We have some extra work to do conversionDict[marker] = marker + '1' for suffix in '1234': # These are the suffix digits that we allow numberedMarker = marker + suffix backConversionDict[numberedMarker] = marker numberedMarkerList.append( numberedMarker ) combinedMarkerDict[numberedMarker] = marker if marker in newlineMarkersList: numberedNewlineMarkersList.append( numberedMarker ); combinedNewlineMarkersList.append( numberedMarker ) else: numberedInternalMarkersList.append( numberedMarker ); combinedInternalMarkersList.append( numberedMarker ) if deprecatedFlag: deprecatedMarkersList.append( numberedMarker ) else: # it's not numberable numberedMarkerList.append( marker ) if marker in newlineMarkersList: numberedNewlineMarkersList.append( marker ) else: numberedInternalMarkersList.append( marker ) if deprecatedFlag: deprecatedMarkersList.append( marker ) #print( conversionDict ); print( backConversionDict ) #print( "newlineMarkersList", len(newlineMarkersList), newlineMarkersList ) #print( "numberedNewlineMarkersList", len(numberedNewlineMarkersList), numberedNewlineMarkersList ) #print( "combinedNewlineMarkersList", len(combinedNewlineMarkersList), combinedNewlineMarkersList ) #print( "internalMarkersList", len(internalMarkersList), internalMarkersList ) #print( "deprecatedMarkersList", len(deprecatedMarkersList), deprecatedMarkersList ) self.__DataDicts = { "rawMarkerDict":rawMarkerDict, "numberedMarkerList":numberedMarkerList, "combinedMarkerDict":combinedMarkerDict, "conversionDict":conversionDict, "backConversionDict":backConversionDict, "newlineMarkersList":newlineMarkersList, "numberedNewlineMarkersList":numberedNewlineMarkersList, "combinedNewlineMarkersList":combinedNewlineMarkersList, "internalMarkersList":internalMarkersList, "numberedInternalMarkersList":numberedInternalMarkersList, "combinedInternalMarkersList":combinedInternalMarkersList, "noteMarkersList":noteMarkersList, "deprecatedMarkersList":deprecatedMarkersList, } return self.__DataDicts # Just delete any of the dictionaries that you don't need # end of importDataToPython def pickle( self, filepath=None ): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert self._XMLtree self.importDataToPython() assert self.__DataDicts if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) filepath = os.path.join( folder, self._filenameBase + "_Tables.pickle" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) ) with open( filepath, 'wb' ) as myFile: pickle.dump( self.__DataDicts, myFile ) # end of pickle def exportDataToPython( self, filepath=None ): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ): """Exports theDict to theFile.""" assert isinstance( theDict, dict ) for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) if isinstance( theDict[dictKey], (tuple,dict,list) ) else 1 break # We only check the first (random) entry we get theFile.write( "{} = {{\n # Key is {}\n # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment ) ) for dictKey in sorted(theDict.keys()): theFile.write( ' {}: {},\n'.format( repr(dictKey), repr(theDict[dictKey]) ) ) theFile.write( "}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDict def exportPythonOrderedDict( theFile, theDict, dictName, keyComment, fieldsComment ): """Exports theDict to theFile.""" assert isinstance( theDict, OrderedDict ) for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) if isinstance( theDict[dictKey], (tuple,dict,list) ) else 1 break # We only check the first (random) entry we get theFile.write( '{} = OrderedDict([\n # Key is {}\n # Fields ({}) are: {}\n'.format( dictName, keyComment, fieldsCount, fieldsComment ) ) for dictKey in theDict.keys(): theFile.write( ' ({}, {}),\n'.format( repr(dictKey), repr(theDict[dictKey]) ) ) theFile.write( "]), # end of {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDict def exportPythonList( theFile, theList, listName, dummy, fieldsComment ): """Exports theList to theFile.""" assert isinstance( theList, list ) fieldsCount = len( theList[0] ) if isinstance( theList[0], (tuple,dict,list) ) else 1 theFile.write( '{} = [\n # Fields ({}) are: {}\n'.format( listName, fieldsCount, fieldsComment ) ) for j,entry in enumerate(theList): theFile.write( ' {}, # {}\n'.format( repr(entry), j ) ) theFile.write( "], # end of {} ({} entries)\n\n".format( listName, len(theList) ) ) # end of exportPythonList assert self._XMLtree self.importDataToPython() assert self.__DataDicts if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.py" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) ) with open( filepath, 'wt', encoding='utf-8' ) as myFile: myFile.write( "# {}\n#\n".format( filepath ) ) myFile.write( "# This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) ) if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) if self.ProgVersion: myFile.write( "# Version: {}\n".format( self.ProgVersion ) ) if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) myFile.write( "from collections import OrderedDict\n\n" ) dictInfo = { "rawMarkerDict":(exportPythonOrderedDict, "rawMarker (in the original XML order)","specified"), "numberedMarkerList":(exportPythonList, "marker","rawMarker"), "combinedMarkerDict":(exportPythonDict, "marker","rawMarker"), "conversionDict":(exportPythonDict, "rawMarker","numberedMarker"), "backConversionDict":(exportPythonDict, "numberedMarker","rawMarker"), "newlineMarkersList":(exportPythonList, "","rawMarker"), "numberedNewlineMarkersList":(exportPythonList, "","rawMarker"), "combinedNewlineMarkersList":(exportPythonList, "","rawMarker"), "internalMarkersList":(exportPythonList, "","rawMarker"), "numberedInternalMarkersList":(exportPythonList, "","rawMarker"), "combinedInternalMarkersList":(exportPythonList, "","rawMarker"), "noteMarkersList":(exportPythonList, "","rawMarker"), "deprecatedMarkersList":(exportPythonList, "","rawMarker") } for dictName in self.__DataDicts: exportFunction, keyComment, fieldsComment = dictInfo[dictName] exportFunction( myFile, self.__DataDicts[dictName], dictName, keyComment, fieldsComment ) myFile.write( "# end of {}".format( os.path.basename(filepath) ) ) # end of exportDataToPython def exportDataToJSON( self, filepath=None ): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert self._XMLtree self.importDataToPython() assert self.__DataDicts if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.json" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) ) with open( filepath, 'wt', encoding='utf-8' ) as myFile: json.dump( self.__DataDicts, myFile, indent=2 ) # end of exportDataToJSON def exportDataToC( self, filepath=None ): """ Writes the information tables to a .h and .c files that can be included in c and c++ programs. NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically. """ def exportPythonDict( hFile, cFile, theDict, dictName, sortedBy, structure ): """ Exports theDict to the .h and .c files. """ def convertEntry( entry ): """ Convert special characters in an entry… """ result = "" if isinstance( entry, tuple ): for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}").format( field, entry ) ) elif isinstance( entry, dict ): for key in sorted(entry.keys()): field = entry[key] if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}").format( field, entry ) ) else: logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry) ) ) return result # end of convertEntry for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count break # We only check the first (random) entry we get #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) ) hFile.write( "typedef struct {}EntryStruct {{\n".format( dictName ) ) for declaration in structure.split(';'): adjDeclaration = declaration.strip() if adjDeclaration: hFile.write( " {};\n".format( adjDeclaration ) ) hFile.write( "}} {}Entry;\n\n".format( dictName ) ) cFile.write( "const static {}Entry\n {}[{}] = {{\n // Fields ({}) are {}\n // Sorted by {}\n".format( dictName, dictName, len(theDict), fieldsCount, structure, sortedBy ) ) for dictKey in sorted(theDict.keys()): if isinstance( dictKey, str ): cFile.write( " {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) ) elif isinstance( dictKey, int ): cFile.write( " {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) ) else: logging.error( _("Can't handle this type of key data yet: {}").format( dictKey ) ) cFile.write( "]}}; // {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDict assert self._XMLtree self.importDataToPython() assert self.__DataDicts raise Exception( "C export not written yet, sorry." ) if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables" ) hFilepath = filepath + '.h' cFilepath = filepath + '.c' if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( cFilepath ) ) # Don't bother telling them about the .h file ifdefName = self._filenameBase.upper() + "_Tables_h" with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \ open( cFilepath, 'wt', encoding='utf-8' ) as myCFile: myHFile.write( "// {}\n//\n".format( hFilepath ) ) myCFile.write( "// {}\n//\n".format( cFilepath ) ) lines = "// This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n//\n".format( ProgVersion, datetime.now() ) myHFile.write( lines ); myCFile.write( lines ) if self.titleString: lines = "// {} data\n".format( self.titleString ) myHFile.write( lines ); myCFile.write( lines ) if self.ProgVersion: lines = "// Version: {}\n".format( self.ProgVersion ) myHFile.write( lines ); myCFile.write( lines ) if self.dateString: lines = "// Date: {}\n//\n".format( self.dateString ) myHFile.write( lines ); myCFile.write( lines ) myCFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) ) myHFile.write( "\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) ) myCFile.write( '#include "{}"\n\n'.format( os.path.basename(hFilepath) ) ) CHAR = "const unsigned char" BYTE = "const int" dictInfo = { "referenceNumberDict":("referenceNumber (integer 1..255)", "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} marker[3+1];" .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ), "rawMarkerDict":("marker", "{} marker[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;" .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ), "CCELDict":("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} marker[3+1];".format(CHAR,BYTE,CHAR) ), "SBLDict":("SBLAbbreviation", "{}* SBLAbbreviation; {} referenceNumber; {} marker[3+1];".format(CHAR,BYTE,CHAR) ), "EnglishNameDict":("nameEnglish", "{}* nameEnglish; {} referenceNumber; {} marker[3+1];".format(CHAR,BYTE,CHAR) ) } for dictName,dictData in self.__DataDicts.items(): exportPythonDict( myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] ) myHFile.write( "#endif // {}\n\n".format( ifdefName ) ) myHFile.write( "// end of {}".format( os.path.basename(hFilepath) ) ) myCFile.write( "// end of {}".format( os.path.basename(cFilepath) ) )
class BibleReferencesLinksConverter: """ Class for reading, validating, and converting BibleReferencesLinks. This is only intended as a transitory class (used at start-up). The BibleReferencesLinks class has functions more generally useful. """ def __init__( self ): # We can't give this parameters because of the singleton """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = 'BibleReferencesLinks' # These fields are used for parsing the XML self._treeTag = 'BibleReferencesLinks' self._headerTag = 'header' self._mainElementTag = 'BibleReferenceLinks' # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = () self._optionalAttributes = () self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes self._compulsoryElements = ( 'sourceReference', 'sourceComponent', 'BibleReferenceLink', ) self._optionalElements = ( ) self._uniqueElements = ( 'sourceReference' ) # These are fields that we will fill later self._XMLheader, self._XMLtree = None, None self.__DataList = {} # Used for import self.titleString = self.ProgVersion = self.dateString = '' # end of BibleReferencesLinksConverter.__init__ def loadAndValidate( self, XMLFilepath=None ): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml" ) # Relative to module, not cwd self.__load( XMLFilepath ) if BibleOrgSysGlobals.strictCheckingFlag: self.__validate() else: # The data must have been already loaded if XMLFilepath is not None and XMLFilepath!=self.__XMLFilepath: logging.error( _("Bible references links are already loaded -- your different filepath of {!r} was ignored").format( XMLFilepath ) ) return self # end of BibleReferencesLinksConverter.loadAndValidate def __load( self, XMLFilepath ): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert XMLFilepath self.__XMLFilepath = XMLFilepath assert self._XMLtree is None or len(self._XMLtree)==0 # Make sure we're not doing this twice if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading BibleReferencesLinks XML file from {!r}…").format( self.__XMLFilepath ) ) self._XMLtree = ElementTree().parse( self.__XMLFilepath ) assert self._XMLtree # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLtree.remove( header ) BibleOrgSysGlobals.checkXMLNoText( header, "header" ) BibleOrgSysGlobals.checkXMLNoTail( header, "header" ) BibleOrgSysGlobals.checkXMLNoAttributes( header, "header" ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" ) if work.tag == "work": self.ProgVersion = work.find('version').text self.dateString = work.find("date").text self.titleString = work.find("title").text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag ) ) ) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( header.tail ) ) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag ) ) # end of BibleReferencesLinksConverter.__load def __validate( self ): """ Check/validate the loaded data. """ assert self._XMLtree uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] expectedID = 1 for j,element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag ) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) ) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) # Get the sourceComponent to use as a record ID ID = element.find("sourceComponent").text # Check compulsory elements for elementName in self._compulsoryElements: foundElement = element.find( elementName ) if foundElement is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})").format( elementName, ID, j ) ) else: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag ) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag ) #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag ) if not foundElement.text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) ) # Check optional elements for elementName in self._optionalElements: foundElement = element.find( elementName ) if foundElement is not None: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag ) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag ) BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag ) if not foundElement.text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})").format( subelement.tag, subelement.text, ID, j ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})").format( text, elementName, ID, j ) ) uniqueDict["Element_"+elementName].append( text ) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) ) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}").format( element.tail, element.tag, j ) ) if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag ) ) # end of BibleReferencesLinksConverter.__validate def __str__( self ): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ indent = 2 result = "BibleReferencesLinksConverter object" if self.titleString: result += ('\n' if result else '') + ' '*indent + _("Title: {}").format( self.titleString ) if self.ProgVersion: result += ('\n' if result else '') + ' '*indent + _("Version: {}").format( self.ProgVersion ) if self.dateString: result += ('\n' if result else '') + ' '*indent + _("Date: {}").format( self.dateString ) if self._XMLtree is not None: result += ('\n' if result else '') + ' '*indent + _("Number of entries = {}").format( len(self._XMLtree) ) return result # end of BibleReferencesLinksConverter.__str__ def __len__( self ): """ Returns the number of references links loaded. """ return len( self._XMLtree ) # end of BibleReferencesLinksConverter.__len__ def importDataToPython( self ): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ def makeList( parameter1, parameter2 ): """ Returns a list containing all parameters. Parameter1 may already be a list. """ if isinstance( parameter1, list ): #assert parameter2 not in parameter1 parameter1.append( parameter2 ) return parameter1 else: return [ parameter1, parameter2 ] # end of makeList assert self._XMLtree if self.__DataList: # We've already done an import/restructuring -- no need to repeat it return self.__DataList, self.__DataDict # We'll create a number of dictionaries with different elements as the key rawRefLinkList = [] actualLinkCount = 0 for element in self._XMLtree: #print( BibleOrgSysGlobals.elementStr( element ) ) # Get these first for helpful error messages sourceReference = element.find('sourceReference').text sourceComponent = element.find('sourceComponent').text assert sourceComponent in ('Section','Verses','Verse',) BibleOrgSysGlobals.checkXMLNoText( element, sourceReference, 'kls1' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sourceReference, 'kd21' ) BibleOrgSysGlobals.checkXMLNoTail( element, sourceReference, 'so20' ) actualRawLinksList = [] for subelement in element: #print( BibleOrgSysGlobals.elementStr( subelement ) ) if subelement.tag in ( 'sourceReference','sourceComponent',): # already processed these BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12' ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02' ) BibleOrgSysGlobals.checkXMLNoTail( subelement, sourceReference, 'sqw1' ) elif subelement.tag == 'BibleReferenceLink': BibleOrgSysGlobals.checkXMLNoText( subelement, sourceReference, 'haw9' ) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19' ) BibleOrgSysGlobals.checkXMLNoTail( subelement, sourceReference, 'jsd9' ) targetReference = subelement.find('targetReference').text targetComponent = subelement.find('targetComponent').text assert targetComponent in ('Section','Verses','Verse',) linkType = subelement.find('linkType').text assert linkType in ('TSK','QuotedOTReference','AlludedOTReference','PossibleOTReference',) actualRawLinksList.append( (targetReference,targetComponent,linkType,) ) actualLinkCount += 1 rawRefLinkList.append( (sourceReference,sourceComponent,actualRawLinksList,) ) if BibleOrgSysGlobals.verbosityLevel > 1: print( " {} raw links loaded (with {} actual raw link entries)".format( len(rawRefLinkList), actualLinkCount ) ) myRefLinkList = [] actualLinkCount = 0 BOS = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' ) for j,(sourceReference,sourceComponent,actualRawLinksList) in enumerate( rawRefLinkList ): # Just do some testing first if sourceComponent == 'Verse': x = SimpleVerseKey( sourceReference ) else: flag = False try: x = SimpleVerseKey( sourceReference, ignoreParseErrors=True ) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error( "{} {!r} failed!".format( sourceComponent, sourceReference ) ) raise TypeError # Now do the actual parsing parsedSourceReference = FlexibleVersesKey( sourceReference ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( j, sourceComponent, sourceReference, parsedSourceReference ) #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference actualLinksList = [] for k,(targetReference,targetComponent,linkType) in enumerate( actualRawLinksList ): # Just do some testing first if targetComponent == 'Verse': x = SimpleVerseKey( targetReference ) else: flag = False try: x = SimpleVerseKey( targetReference, ignoreParseErrors=True ) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error( "{} {!r} failed!".format( targetComponent, targetReference ) ) raise TypeError # Now do the actual parsing try: parsedTargetReference = FlexibleVersesKey( targetReference ) except TypeError: print( " Temporarily ignored {!r} (TypeError from FlexibleVersesKey)".format( targetReference ) ) parsedTargetReference = None if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( ' ', targetComponent, targetReference, parsedTargetReference ) #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference actualLinksList.append( (targetReference,targetComponent,parsedTargetReference,linkType,) ) actualLinkCount += 1 myRefLinkList.append( (sourceReference,sourceComponent,parsedSourceReference,actualLinksList,) ) if BibleOrgSysGlobals.verbosityLevel > 1: print( " {} links processed (with {} actual link entries)".format( len(rawRefLinkList), actualLinkCount ) ) #print( myRefLinkList ); halt self.__DataList = myRefLinkList # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Create a link dictionary (by verse key) myRefLinkDict = {} for sourceReference,sourceComponent,parsedSourceReference,actualLinksList in myRefLinkList: #print( sourceReference, sourceComponent, parsedSourceReference ) #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for verseRef in parsedSourceReference.getIncludedVerses(): #print( verseRef ) assert isinstance( verseRef, SimpleVerseKey ) if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (sourceReference,sourceComponent,parsedSourceReference,actualLinksList,) ) #print( myRefLinkDict ); halt originalLinks = len( myRefLinkDict ) print( " {} verse links added to dictionary (includes filling out spans)".format( originalLinks ) ) #print( myRefLinkDict ); halt # Create a reversed link dictionary (by verse key) for sourceReference,sourceComponent,parsedSourceReference,actualLinksList in myRefLinkList: #print( sourceReference, sourceComponent, parsedSourceReference ) #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for targetReference,targetComponent,parsedTargetReference,linkType in actualLinksList: if parsedTargetReference is not None: for verseRef in parsedTargetReference.getIncludedVerses(): #print( verseRef ) assert isinstance( verseRef, SimpleVerseKey ) if linkType == 'TSK': reverseLinkType = 'TSKQuoted' elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted' elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded' elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible' else: halt # Have a new linkType! if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (targetReference,targetComponent,parsedTargetReference,[(sourceReference,sourceComponent,parsedSourceReference,reverseLinkType)]) ) #print( myRefLinkDict ); halt totalLinks = len( myRefLinkDict ) reverseLinks = totalLinks - originalLinks print( " {} reverse links added to dictionary to give {} total".format( reverseLinks, totalLinks ) ) #print( myRefLinkDict ); halt self.__DataDict = myRefLinkDict # Let's find the most number of references for a verse mostReferences = totalReferences = 0 for verseRef, entryList in self.__DataDict.items(): numRefs = len( entryList ) if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef totalReferences += numRefs print( " {} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText() ) ) print( " {} total links for all references".format( totalReferences ) ) return self.__DataList, self.__DataDict # end of BibleReferencesLinksConverter.importDataToPython def pickle( self, filepath=None ): """ Writes the information tables to a .pickle file that can be easily loaded into a Python3 program. """ import pickle assert self._XMLtree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) filepath = os.path.join( folder, self._filenameBase + "_Tables.pickle" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) ) with open( filepath, 'wb' ) as myFile: pickle.dump( self.__DataList, myFile ) pickle.dump( self.__DataDict, myFile ) # end of BibleReferencesLinksConverter.pickle def exportDataWithIndex( self, filepath=None ): """ Writes the information tables to a .pickle index file and .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import pickle assert self._XMLtree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) indexFilepath = os.path.join( folder, self._filenameBase + "_Tables.index.pickle" ) dataFilepath = os.path.join( folder, self._filenameBase + "_Tables.data.pickle" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( dataFilepath ) ) index = {} filePosition = 0 with open( dataFilepath, 'wb' ) as myFile: for vKey,refList in self.__DataDict.items(): #print( "vKey", vKey, vKey.getVerseKeyText() ) #print( " ", refList ) length = myFile.write( pickle.dumps( refList ) ) #print( " ", filePosition, length ) assert vKey not in index index[vKey] = (filePosition, length ) filePosition += length with open( indexFilepath, 'wb' ) as myFile: pickle.dump( index, myFile ) # end of BibleReferencesLinksConverter.exportDataWithIndex def exportDataToPython( self, filepath=None ): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDictOrList( theFile, theDictOrList, dictName, keyComment, fieldsComment ): """Exports theDictOrList to theFile.""" assert theDictOrList raise Exception( "Not written yet" ) for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) break # We only check the first (random) entry we get theFile.write( "{} = {{\n # Key is {}\n # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment ) ) for dictKey in sorted(theDict.keys()): theFile.write( ' {}: {},\n'.format( repr(dictKey), repr(theDict[dictKey]) ) ) theFile.write( "}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDictOrList assert self._XMLtree self.importDataToPython() assert self.__DataList assert self.__DataDict print( "Export to Python not written yet!" ) halt if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) filepath = os.path.join( folder, self._filenameBase + "_Tables.py" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) ) with open( filepath, 'wt', encoding='utf-8' ) as myFile: myFile.write( "# {}\n#\n".format( filepath ) ) myFile.write( "# This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) ) if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) if self.ProgVersion: myFile.write( "# Version: {}\n".format( self.ProgVersion ) ) if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) mostEntries = "0=referenceNumber (integer 1..255), 1=sourceComponent/BBB (3-uppercase characters)" dictInfo = { "referenceNumberDict":("referenceNumber (integer 1..255)","specified"), "sourceComponentDict":("sourceComponent","specified"), "sequenceList":("sourceComponent/BBB (3-uppercase characters)",""), "initialAllAbbreviationsDict":("allAbbreviations", mostEntries) } for dictName,dictData in self.__DataList.items(): exportPythonDictOrList( myFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] ) myFile.write( "# end of {}".format( os.path.basename(filepath) ) ) # end of BibleReferencesLinksConverter.exportDataToPython def exportDataToJSON( self, filepath=None ): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert self._XMLtree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) filepath = os.path.join( folder, self._filenameBase + "_Tables.json" ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) ) with open( filepath, 'wt', encoding='utf-8' ) as myFile: for something in self.__DataList: # temp for debugging ........................................... print( "Dumping something", something ) json.dump( something, myFile, indent=2 ) json.dump( self.__DataList, myFile, indent=2 ) for someKey,someItem in self.__DataDict.items(): # temp for debugging ........................................... print( "Dumping someKey", someKey ) json.dump( someKey, myFile, indent=2 ) print( "Dumping someItem", someItem ) json.dump( someItem, myFile, indent=2 ) json.dump( self.__DataDict, myFile, indent=2 ) # end of BibleReferencesLinksConverter.exportDataToJSON def exportDataToC( self, filepath=None ): """ Writes the information tables to a .h and .c files that can be included in c and c++ programs. NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically. """ def exportPythonDict( hFile, cFile, theDict, dictName, sortedBy, structure ): """ Exports theDict to the .h and .c files. """ def convertEntry( entry ): """ Convert special characters in an entry… """ result = "" if isinstance( entry, str ): result = entry elif isinstance( entry, tuple ): for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) elif isinstance( field, list): raise Exception( "Not written yet (list1)" ) else: logging.error( _("Cannot convert unknown field type {!r} in tuple entry {!r}").format( field, entry ) ) elif isinstance( entry, dict ): for key in sorted(entry.keys()): field = entry[key] if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) elif isinstance( field, list): raise Exception( "Not written yet (list2)" ) else: logging.error( _("Cannot convert unknown field type {!r} in dict entry {!r}").format( field, entry ) ) else: logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry) ) ) return result # end of convertEntry for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count break # We only check the first (random) entry we get #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) ) hFile.write( "typedef struct {}EntryStruct {{\n".format( dictName ) ) for declaration in structure.split(';'): adjDeclaration = declaration.strip() if adjDeclaration: hFile.write( " {};\n".format( adjDeclaration ) ) hFile.write( "}} {}Entry;\n\n".format( dictName ) ) cFile.write( "const static {}Entry\n {}[{}] = {{\n // Fields ({}) are {}\n // Sorted by {}\n".format( dictName, dictName, len(theDict), fieldsCount, structure, sortedBy ) ) for dictKey in sorted(theDict.keys()): if isinstance( dictKey, str ): cFile.write( " {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) ) elif isinstance( dictKey, int ): cFile.write( " {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) ) else: logging.error( _("Can't handle this type of key data yet: {}").format( dictKey ) ) cFile.write( "]}}; // {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDict assert self._XMLtree self.importDataToPython() assert self.__DataList print( "Export to C not written yet!" ) halt if not filepath: folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" ) if not os.path.exists( folder ): os.mkdir( folder ) filepath = os.path.join( folder, self._filenameBase + "_Tables" ) hFilepath = filepath + '.h' cFilepath = filepath + '.c' if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( cFilepath ) ) # Don't bother telling them about the .h file ifdefName = self._filenameBase.upper() + "_Tables_h" with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \ open( cFilepath, 'wt', encoding='utf-8' ) as myCFile: myHFile.write( "// {}\n//\n".format( hFilepath ) ) myCFile.write( "// {}\n//\n".format( cFilepath ) ) lines = "// This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n//\n".format( ProgVersion, datetime.now() ) myHFile.write( lines ); myCFile.write( lines ) if self.titleString: lines = "// {} data\n".format( self.titleString ) myHFile.write( lines ); myCFile.write( lines ) if self.ProgVersion: lines = "// Version: {}\n".format( self.ProgVersion ) myHFile.write( lines ); myCFile.write( lines ) if self.dateString: lines = "// Date: {}\n//\n".format( self.dateString ) myHFile.write( lines ); myCFile.write( lines ) myCFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) ) myHFile.write( "\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) ) myCFile.write( '#include "{}"\n\n'.format( os.path.basename(hFilepath) ) ) CHAR = "const unsigned char" BYTE = "const int" dictInfo = { "referenceNumberDict":("referenceNumber (integer 1..255)", "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} sourceComponent[3+1];" .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ), "sourceComponentDict":("sourceComponent", "{} sourceComponent[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks;" .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ), "sequenceList":("sequenceList",), "CCELDict":("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} sourceComponent[3+1];".format(CHAR,BYTE,CHAR) ), "initialAllAbbreviationsDict":("abbreviation", "{}* abbreviation; {} sourceComponent[3+1];".format(CHAR,CHAR) ) } for dictName,dictData in self.__DataList.items(): exportPythonDict( myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] ) myHFile.write( "#endif // {}\n\n".format( ifdefName ) ) myHFile.write( "// end of {}".format( os.path.basename(hFilepath) ) ) myCFile.write( "// end of {}".format( os.path.basename(cFilepath) ) )