def __validateAndExtractChapter( self, BBB, thisBook, chapter ): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter...") ) # Process the div attributes first chapterNumber = numVerses = None for attrib,value in chapter.items(): if attrib=="n": chapterNumber = value elif attrib=="VERSES": numVerses = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) ) if chapterNumber: #print( BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '' ) # Fix a mistake in the Chinese_SU module thisBook.addLine( 'c', chapterNumber ) else: logging.error( "Missing 'n' attribute in chapter element for BBB".format( BBB ) ) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'l5ks' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5f7h' ) verseNumber = toVerseNumber = None for attrib,value in element.items(): if attrib=="n": verseNumber = value elif attrib=="t": toVerseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( verseNumber ) #thisBook.addLine( 'v', verseNumber ) vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) if vText: # This is the main text of the verse (follows the verse milestone) #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #print( "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate( vText.split( '\n' ) ): if j==0: thisBook.addLine( 'q1', '' ) thisBook.addLine( 'v', verseNumber + ' ' + textBit ) else: thisBook.addLine( 'q1', textBit ) else: # Just one verse line thisBook.addLine( 'v', verseNumber + ' ' + vText ) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag ) )
def loadCharacterFormatting( self, element, location, BBB, C, V ): """ """ marker, text, tail = element.tag, clean(element.text), clean(element.tail) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'sd12' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) ) for subelement in element: sublocation = subelement.tag + " of " + location #print( "element", repr(element.tag) ) if subelement.tag == 'f': #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) ) self.loadFootnote( subelement, sublocation, BBB, C, V ) else: logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
def getMaximumPossibleFilenameTuples( self, strictCheck=False ): """ Find the method that finds the maximum number of USFM Bible files. The result is a list of 2-tuples in the default rough sequence order from the BibleBooksCodes module. Each tuple contains ( BBB, filename ) not including the folder path. """ #if BibleOrgSysGlobals.debugFlag: print( "getMaximumPossibleFilenameTuples( {} )".format( strictCheck ) ) resultString, resultList = 'Confirmed', self.getConfirmedFilenameTuples() resultListExt = self.getPossibleFilenameTuplesExt() if len(resultListExt) > len(resultList): resultString, resultList = 'External', resultListExt resultListInt = self.getPossibleFilenameTuplesInt() if len(resultListInt) > len(resultList): resultString, resultList = 'Internal', resultListInt if BibleOrgSysGlobals.verbosityLevel > 2: print( "getMaximumPossibleFilenameTuples: using {}".format( resultString ) ) if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: #if BibleOrgSysGlobals.debugFlag: print( " getMaximumPossibleFilenameTuples doing strictCheck…" ) for BBB,filename in resultList[:]: firstLine = BibleOrgSysGlobals.peekIntoFile( filename, self.givenFolderName ) #print( 'UFN', repr(firstLine) ) if firstLine is None: resultList.remove( (BBB,filename) ); continue # seems we couldn't decode the file if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff logging.info( "USFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) ) firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM) if not firstLine or firstLine[0] != '\\': # don't allow a blank first line and must start with a backslash resultList.remove( (BBB,filename) ) self.lastTupleList = resultList #print( "getMaximumPossibleFilenameTuples is returning", resultList ) return resultList # No need to sort these, coz all the above calls produce sorted results
def saveAnyChangedGlosses( self, exportAlso=False ): """ Save the glossing dictionary to a pickle file. """ if debuggingThisModule: print( "saveAnyChangedGlosses()" ) if self.haveGlossingDictChanges: BibleOrgSysGlobals.backupAnyExistingFile( self.glossingDictFilepath, 9 ) if BibleOrgSysGlobals.verbosityLevel > 2 or debuggingThisModule: print( " Saving Hebrew glossing dictionary ({}->{} entries) to '{}'…".format( self.loadedGlossEntryCount, len(self.glossingDict), self.glossingDictFilepath ) ) elif BibleOrgSysGlobals.verbosityLevel > 1: print( " Saving Hebrew glossing dictionary ({}->{} entries)…".format( self.loadedGlossEntryCount, len(self.glossingDict) ) ) with open( self.glossingDictFilepath, 'wb' ) as pickleFile: pickle.dump( self.glossingDict, pickleFile ) if exportAlso: self.exportGlossingDictionary()
def load( self ): """ Load a single source XML file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) self.tree = ElementTree().parse( self.sourceFilepath ) if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == HaggaiXMLBible.treeTag: location = "Haggai XML file" BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' ) schema = name = status = BibleType = revision = version = lgid = None for attrib,value in self.tree.items(): if attrib == HaggaiXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation': schema = value elif attrib == "biblename": name = value elif attrib == "lgid": lgid = value # In italian.xml this is set to "german" elif attrib == "status": status = value elif attrib == "type": BibleType = value elif attrib == "revision": revision = value elif attrib == "version": version = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) ) if name: self.name = name if status: self.status = status if revision: self.revision = revision if version: self.version = version if self.tree[0].tag == 'INFORMATION': self.header = self.tree[0] self.tree.remove( self.header ) self.__validateAndExtractHeader() else: # Handle information records at the END of the file ix = len(self.tree) - 1 if self.tree[ix].tag == 'INFORMATION': self.header = self.tree[ix] self.tree.remove( self.header ) self.__validateAndExtractHeader() # Find the submain (book) containers for element in self.tree: if element.tag == HaggaiXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' ) self.__validateAndExtractBook( element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.bookTag, element.tag ) ) else: logging.error( "Expected to load {!r} but got {!r}".format( HaggaiXMLBible.treeTag, self.tree.tag ) ) self.doPostLoadProcessing()
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = bookShortName = bookNumber = None for attrib,value in book.items(): if attrib=="bnumber": bookNumber = value elif attrib=="bname": bookName = value elif attrib=="bsname": bookShortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookNumber: try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) except KeyError: logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \ .format( bookNumber, bookName, bookShortName ) ) elif bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'Haggai XML Bible Book object' thisBook.objectTypeString = 'Haggai' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == HaggaiXMLBible.captionTag: sublocation = "caption in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' ) thisBook.addLine( 'mt', element.text ) elif element.tag == HaggaiXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractChapter( self, BBB, thisBook, chapter ): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter…") ) # Process the chapter attributes first chapterNumber = numVerses = None for attrib,value in chapter.items(): if attrib=="cnumber": chapterNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) ) if chapterNumber: #print( BBB, 'c', chapterNumber ) thisBook.addLine( 'c', chapterNumber ) else: logging.error( "Missing 'n' attribute in chapter element for {}".format( BBB ) ) for element in chapter: if element.tag == HaggaiXMLBible.paragraphTag: location = "paragraph in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractParagraph( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.verseTag+'disabled': location = "verse in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms location = "caption in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' ) # Handle caption attributes vRef = None for attrib,value in element.items(): if attrib=="vref": vRef = value if BibleOrgSysGlobals.debugFlag: assert vRef == '1' else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert vRef vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) ) if vText: # This is the main text of the caption #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
def save( self ): """ Save all of the program settings to disk. They must have already been saved into self.data. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("ApplicationSettings.save() in {!r}").format( self.settingsFilepath ) ) assert self.data assert self.settingsFilepath BibleOrgSysGlobals.backupAnyExistingFile( self.settingsFilepath, numBackups=8 ) with open( self.settingsFilepath, 'wt', encoding='utf-8' ) as settingsFile: # It may or may not have previously existed # Put a (comment) heading in the file first settingsFile.write( '# ' + _("{} {} settings file").format( APP_NAME, SettingsVersion ) + '\n' ) settingsFile.write( '# ' + _("Originally saved {} as {}") \ .format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self.settingsFilepath ) + '\n\n' ) self.data.write( settingsFile )
def __validateAndExtractBook( self, book, bookNumber ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName ) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText( adjustedBookName ) BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: print( "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) ) BBB = BBB2 #print( BBB ); halt if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def __validateAndExtractBook( self, book ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName ) # Try non-English booknames #print( "bookName", bookName, BBB ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB ) thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) ) thisBook.addLine( 'h', bookName ) thisBook.addLine( 'mt1', bookName ) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook ) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
def loadFigure( self, element, location ): """ """ BibleOrgSysGlobals.checkXMLNoText( element, location, 'ff36' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'cf35' ) figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' } for subelement in element: sublocation = subelement.tag + " of " + location figTag, figText = subelement.tag, clean(subelement.text) assert( figTag in figDict ) figDict[figTag] = '' if figText is None else figText BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'jkf5' ) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'ld18' ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'hb46' ) newString = '' for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ): newString += ('' if j==0 else '|') + figDict[tag] figTail = clean( element.tail ) self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
def segmentizeLine( line, segmentEndPunctuation='.?!;:' ): """ Break the line into segments (like sentences that should match across the translations) and then break each segment into words. If you want case folding, convert line to lowerCase before calling. Set segmentEndPunctuation to None if you don't want the lines further divided. Returns a list of lists of words. """ if BibleOrgSysGlobals.debugFlag: if debuggingThisModule: print( exp("segmentizeLine( {!r} )").format( line ) ) if segmentEndPunctuation: for segmentEndChar in segmentEndPunctuation: line = line.replace( segmentEndChar, 'SsSsSsS' ) line = line.replace('—',' ').replace('–',' ') # Treat em-dash and en-dash as word break characters lineList = [] for segment in line.split( 'SsSsSsS' ): segmentList = [] for rawWord in segment.split(): word = rawWord for internalMarker in BibleOrgSysGlobals.internal_SFMs_to_remove: word = word.replace( internalMarker, '' ) word = BibleOrgSysGlobals.stripWordPunctuation( word ) if word and not word[0].isalnum(): #print( "not alnum", repr(rawWord), repr(word) ) if len(word) > 1: if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "segmentizeLine: {} {}:{} ".format( self.BBB, C, V ) \ + _("Have unexpected character starting word {!r}").format( word ) ) word = word[1:] if word: # There's still some characters remaining after all that stripping #print( "here", repr(rawWord), repr(word) ) if 1 or BibleOrgSysGlobals.verbosityLevel > 3: # why??? for k,char in enumerate(word): if not char.isalnum() and (k==0 or k==len(word)-1 or char not in BibleOrgSysGlobals.MEDIAL_WORD_PUNCT_CHARS): if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "segmentizeLine: {} {}:{} ".format( self.BBB, C, V ) + _("Have unexpected {!r} in word {!r}").format( char, word ) ) lcWord = word.lower() isAReferenceOrNumber = True for char in word: if not char.isdigit() and char not in ':-,.': isAReferenceOrNumber = False; break if not isAReferenceOrNumber: segmentList.append( word ) #lDict['allWordCounts'][word] = 1 if word not in lDict['allWordCounts'] else lDict['allWordCounts'][word] + 1 #lDict['allCaseInsensitiveWordCounts'][lcWord] = 1 if lcWord not in lDict['allCaseInsensitiveWordCounts'] else lDict['allCaseInsensitiveWordCounts'][lcWord] + 1 lineList.append( segmentList ) #print( ' lineList', lineList ) return lineList
def __validateAndExtractParagraph( self, BBB, chapterNumber, thisBook, paragraph ): """ Check/validate and extract paragraph data from the given XML book record finding and saving paragraphs and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML paragraph...") ) location = "paragraph in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoAttributes( paragraph, location, 'brgw3' ) BibleOrgSysGlobals.checkXMLNoText( paragraph, location, 'brgw3' ) BibleOrgSysGlobals.checkXMLNoTail( paragraph, location, 'brgw3' ) thisBook.addLine( 'p', '' ) # Handle verse subelements (verses) for element in paragraph: if element.tag == HaggaiXMLBible.verseTag: location = "verse in {} {}".format( BBB, chapterNumber ) self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element ) elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms location = "caption in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' ) # Handle caption attributes vRef = None for attrib,value in element.items(): if attrib=="vref": vRef = value if BibleOrgSysGlobals.debugFlag: assert( vRef == '1' ) else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( vRef ) vText = element.text if not vText: logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) ) if vText: # This is the main text of the caption #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
def exportGlossingDictionary( self, glossingDictExportFilepath=None ): """ Export the glossing dictionary to a text file plus a reversed text file (without the references). Also does a few checks while exporting. (These can be fixed and then the file can be imported.) """ #print( "exportGlossingDictionary()" ) if glossingDictExportFilepath is None: glossingDictExportFilepath = DEFAULT_GLOSSING_EXPORT_FILEPATH if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting glossing dictionary ({} entries) to '{}'…").format( len(self.glossingDict), glossingDictExportFilepath ) ) BibleOrgSysGlobals.backupAnyExistingFile( glossingDictExportFilepath, 5 ) with open( glossingDictExportFilepath, 'wt' ) as exportFile: for word,(genericGloss,genericReferencesList,specificReferencesDict) in self.glossingDict.items(): if ' ' in word or '/' in word: logging.error( _("Word {!r} has illegal characters").format( word ) ) if ' ' in genericGloss: logging.error( _("Generic gloss {!r} for {!r} has illegal characters").format( genericGloss, word ) ) if word.count('=') != genericGloss.count('='): logging.error( _("Generic gloss {!r} and word {!r} has different numbers of morphemes").format( genericGloss, word ) ) if not genericReferencesList: logging.error( _("Generic gloss {!r} for {!r} has no references").format( genericGloss, word ) ) exportFile.write( '{} {} {} {}\n'.format( genericReferencesList, specificReferencesDict, genericGloss, word ) ) # Works best in editors with English on the left, Hebrew on the right if self.glossingDict: if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting reverse glossing dictionary ({} entries) to '{}'…").format( len(self.glossingDict), DEFAULT_GENERIC_GLOSSING_REVERSE_EXPORT_FILEPATH ) ) BibleOrgSysGlobals.backupAnyExistingFile( DEFAULT_GENERIC_GLOSSING_REVERSE_EXPORT_FILEPATH, 5 ) doneGlosses = [] with open( DEFAULT_GENERIC_GLOSSING_REVERSE_EXPORT_FILEPATH, 'wt' ) as exportFile: for word,(genericGloss,genericReferencesList,specificReferencesDict) in sorted( self.glossingDict.items(), key=lambda theTuple: theTuple[1][0].lower() ): if genericGloss in doneGlosses: logging.warning( _("Generic gloss {!r} has already appeared: currently for word {!r}").format( genericGloss, word ) ) exportFile.write( '{} {}\n'.format( genericGloss, word ) ) # Works best in editors with English on the left, Hebrew on the right doneGlosses.append( genericGloss )
def loadCrossreference( self, element, location ): """ Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x> """ text, tail = clean(element.text), clean(element.tail) caller = None for attrib,value in element.items(): if attrib == 'caller': caller = value else: logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) ) for subelement in element: sublocation = subelement.tag + " of " + location marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail) #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) ) #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','xo','xt',): #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) ) if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','xo','xt',) ) if marker=='ref': assert( xText ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 's1sd' ) target = None for attrib,value in subelement.items(): if attrib == 'tgt': target = value else: logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) ) else: halt else: BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'sc35' ) self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) ) if marker[0] == 'x': # Starts with x, e.g., xo, xt for sub2element in subelement: sub2location = sub2element.tag + " of " + sublocation marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail) BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' ) if marker2=='ref': if xText2: #print( 'xt2', marker2, repr(xText2), repr(xTail2), sub2location ) self.thisBook.appendToLastLine( xText2 ) target = None for attrib,value in sub2element.items(): if attrib == 'tgt': target = value else: logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) ) else: halt else: halt if xTail2: self.thisBook.appendToLastLine( xTail2 ) else: halt if xTail: self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) ) self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
def validateEntries( self, segment ): """ Check/validate the given Strongs lexicon entries. """ if BibleOrgSysGlobals.debugFlag: assert segment.tag == "entries" BibleOrgSysGlobals.checkXMLNoText( segment, segment.tag, "kw99" ) BibleOrgSysGlobals.checkXMLNoTail( segment, segment.tag, "ls90" ) BibleOrgSysGlobals.checkXMLNoAttributes( segment, segment.tag, "hsj2" ) self.StrongsEntries = {} for element in segment: if element.tag == "entry": self.validateEntry( element )
def testMySwB( indexString, MySwBfolder, MySwBfilename ): """ Crudely demonstrate the MySword Bible class. """ #print( "tMSB", MySwBfolder ) import VerseReferences #testFolder = "../../../../../Data/Work/Bibles/MySword modules/" # Must be the same as below #TUBfolder = os.path.join( MySwBfolder, MySwBfilename ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Demonstrating the MySword Bible class {}…").format( indexString) ) if BibleOrgSysGlobals.verbosityLevel > 0: print( " Test folder is {!r} {!r}".format( MySwBfolder, MySwBfilename ) ) MySwB = MySwordBible( MySwBfolder, MySwBfilename ) MySwB.preload() #MySwB.load() # Load and process the file if BibleOrgSysGlobals.verbosityLevel > 1: print( MySwB ) # Just print a summary #print( MySwB.suppliedMetadata['MySword'] ) if MySwB is not None: if BibleOrgSysGlobals.strictCheckingFlag: MySwB.check() for reference in ( ('OT','GEN','1','1'), ('OT','GEN','1','3'), ('OT','PSA','3','0'), ('OT','PSA','3','1'), \ ('OT','DAN','1','21'), ('NT','MAT','3','5'), ('NT','JDE','1','4'), ('NT','REV','22','21'), \ ('DC','BAR','1','1'), ('DC','MA1','1','1'), ('DC','MA2','1','1',), ): (t, b, c, v) = reference if t=='OT' and len(MySwB)==27: continue # Don't bother with OT references if it's only a NT if t=='NT' and len(MySwB)==39: continue # Don't bother with NT references if it's only a OT if t=='DC' and len(MySwB)<=66: continue # Don't bother with DC references if it's too small svk = VerseReferences.SimpleVerseKey( b, c, v ) #print( svk, ob.getVerseDataList( reference ) ) try: shortText, verseText = svk.getShortText(), MySwB.getVerseText( svk ) if BibleOrgSysGlobals.verbosityLevel > 1: print( reference, shortText, verseText ) except KeyError: if BibleOrgSysGlobals.verbosityLevel > 1: print( reference, "not found!!!" ) if 0: # Now export the Bible and compare the round trip MySwB.toMySword() #doaResults = MySwB.doAllExports( wantPhotoBible=False, wantODFs=False, wantPDFs=False ) if BibleOrgSysGlobals.strictCheckingFlag: # Now compare the original and the derived USX XML files outputFolder = "OutputFiles/BOS_MySword_Reexport/" if BibleOrgSysGlobals.verbosityLevel > 1: print( "\nComparing original and re-exported MySword files…" ) result = BibleOrgSysGlobals.fileCompare( MySwBfilename, MySwBfilename, MySwBfolder, outputFolder ) if BibleOrgSysGlobals.debugFlag: if not result: halt
def _validate( self ): """ Check/validate the loaded data. """ assert self._XMLtree uniqueDict = {} #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] for j,element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText( element, element.tag ) BibleOrgSysGlobals.checkXMLNoTail( element, element.tag ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag ) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( "Compulsory {!r} attribute is missing from {} element in record {}".format( attributeName, element.tag, j ) ) if not attributeValue and attributeName!="type": logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( "Optional {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}".format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None and attributeName!="reference_name": if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}".format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) else: logging.warning( "Unexpected element: {} in record {}".format( element.tag, j ) )
def load( self ): """ Load a single source XML file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) ) self.tree = ElementTree().parse( self.sourceFilepath ) if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all # Find the main (bible) container if self.tree.tag == OpenSongXMLBible.treeTag: location = "XML file" BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' ) name = shortName = None for attrib,value in self.tree.items(): if attrib=="n": name = value elif attrib=="sn": shortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) ) # Find the submain (book) containers for element in self.tree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' ) self.__validateAndExtractBook( element ) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag ) ) else: logging.error( "Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.tree.tag ) ) self.doPostLoadProcessing()
def testeSwB( eSwBfolder, eSwBfilename ): # Crudely demonstrate the e-Sword Bible class import VerseReferences #testFolder = "../../../../../Data/Work/Bibles/e-Sword modules/" # Must be the same as below #TUBfolder = os.path.join( eSwBfolder, eSwBfilename ) if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Demonstrating the e-Sword Bible class...") ) if BibleOrgSysGlobals.verbosityLevel > 0: print( " Test folder is {} {}".format( repr(eSwBfolder), repr(eSwBfilename) ) ) eSwB = ESwordBible( eSwBfolder, eSwBfilename ) eSwB.load() # Load and process the file if BibleOrgSysGlobals.verbosityLevel > 1: print( eSwB ) # Just print a summary #print( eSwB.settingsDict ) if 0 and eSwB: if BibleOrgSysGlobals.strictCheckingFlag: eSwB.check() for reference in ( ('OT','GEN','1','1'), ('OT','GEN','1','3'), ('OT','PSA','3','0'), ('OT','PSA','3','1'), \ ('OT','DAN','1','21'), ('NT','MAT','3','5'), ('NT','JDE','1','4'), ('NT','REV','22','21'), \ ('DC','BAR','1','1'), ('DC','MA1','1','1'), ('DC','MA2','1','1',), ): (t, b, c, v) = reference if t=='OT' and len(eSwB)==27: continue # Don't bother with OT references if it's only a NT if t=='NT' and len(eSwB)==39: continue # Don't bother with NT references if it's only a OT if t=='DC' and len(eSwB)<=66: continue # Don't bother with DC references if it's too small svk = VerseReferences.SimpleVerseKey( b, c, v ) #print( svk, ob.getVerseDataList( reference ) ) shortText, verseText = svk.getShortText(), eSwB.getVerseText( svk ) if BibleOrgSysGlobals.verbosityLevel > 1: print( reference, shortText, verseText ) # Now export the Bible and compare the round trip eSwB.toESword() doaResults = eSwB.doAllExports( wantPhotoBible=False, wantODFs=False, wantPDFs=False ) if BibleOrgSysGlobals.strictCheckingFlag: # Now compare the original and the derived USX XML files outputFolder = "OutputFiles/BOS_e-Sword_Reexport/" if BibleOrgSysGlobals.verbosityLevel > 1: print( "\nComparing original and re-exported e-Sword files..." ) result = BibleOrgSysGlobals.fileCompare( eSwBfilename, eSwBfilename, eSwBfolder, outputFolder ) if BibleOrgSysGlobals.debugFlag: if not result: halt
#print( "geometry", geometryMap ) #for something in geometryMap: #print( repr(something) ) settings = ApplicationSettings('BiblelatorData/', 'BiblelatorSettings/', ProgName) settings.load() print(str(settings)) print(repr(settings)) #tkRootWindow.destroy() # Useful if we want to measure the start-up time # Start the program running tkRootWindow.mainloop() # end of ApplicationSettings.demo if __name__ == '__main__': from multiprocessing import freeze_support freeze_support() # Multiprocessing support for frozen Windows executables # Configure basic set-up parser = BibleOrgSysGlobals.setup(ProgName, ProgVersion) BibleOrgSysGlobals.addStandardOptionsAndProcess(parser) demo() BibleOrgSysGlobals.closedown(ProgName, ProgVersion) # end of Settings.py
""" if BibleOrgSysGlobals.verbosityLevel > 1: print(ProgNameVersion) if BibleOrgSysGlobals.commandLineOptions.export: brlc = BibleReferencesLinksConverter().loadAndValidate( ) # Load the XML brlc.exportDataWithIndex() # Produce a data file and an index file brlc.pickle() # Produce a pickle output file brlc.exportDataToJSON() # Produce a json output file brlc.exportDataToPython() # Produce the .py tables brlc.exportDataToC() # Produce the .h and .c tables else: # Must be demo mode # Demo the converter object brlc = BibleReferencesLinksConverter().loadAndValidate( ) # Load the XML print(brlc) # Just print a summary # end of demo if __name__ == '__main__': # Configure basic set-up parser = BibleOrgSysGlobals.setup(ProgName, ProgVersion) BibleOrgSysGlobals.addStandardOptionsAndProcess(parser, exportAvailable=True) demo() BibleOrgSysGlobals.closedown(ProgName, ProgVersion) # end of BibleReferencesLinksConverter.py
def __validate(self): """ Check/validate the loaded data. """ assert (self._XMLtree) uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for j, element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, j)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Get the sourceComponent to use as a record ID ID = element.find("sourceComponent").text # Check compulsory elements for elementName in self._compulsoryElements: foundElement = element.find(elementName) if foundElement is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, j)) else: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag ) if not foundElement.text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check optional elements for elementName in self._optionalElements: foundElement = element.find(elementName) if foundElement is not None: BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag) BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag) if not foundElement.text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, j)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, j)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, j)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j)) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}" ).format(element.tail, element.tag, j)) if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag))
def UnboundBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for Unbound Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one Unbound Bible is found, returns the loaded UnboundBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("UnboundBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks)) if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance(givenFolderName, str) if BibleOrgSysGlobals.debugFlag: assert autoLoad in ( True, False, ) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("UnboundBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName)) return False if not os.path.isdir(givenFolderName): logging.critical( _("UnboundBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName)) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print(" UnboundBibleFileCheck: Looking for files in given {}".format( givenFolderName)) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): if something in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS: continue # don't visit these directories foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append(something) # See if there's an UnboundBible project here in this given folder numFound = 0 looksHopeful = False lastFilenameFound = None for thisFilename in sorted(foundFiles): if thisFilename in ('book_names.txt', 'Readme.txt'): looksHopeful = True elif thisFilename.endswith('_utf8.txt'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName) if firstLine is None: continue # seems we couldn't decode the file if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)": if BibleOrgSysGlobals.verbosityLevel > 3: print("UnB (unexpected) first line was {!r} in {}". format(firstLine, thisFilename)) continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("UnboundBibleFileCheck got", numFound, givenFolderName, lastFilenameFound) if numFound == 1 and (autoLoad or autoLoadBooks): uB = UnboundBible( givenFolderName, lastFilenameFound[:-9] ) # Remove the end of the actual filename "_utf8.txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print(" Looked hopeful but no actual files found") # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning( _("UnboundBibleFileCheck: {!r} subfolder is unreadable"). format(tryFolderName)) continue if BibleOrgSysGlobals.verbosityLevel > 3: print(" UnboundBibleFileCheck: Looking for files in {}".format( tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundSubfiles.append(something) # See if there's an UB project here in this folder for thisFilename in sorted(foundSubfiles): if thisFilename.endswith('_utf8.txt'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName) if firstLine is None: continue # seems we couldn't decode the file if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)": if BibleOrgSysGlobals.verbosityLevel > 3: print("UnB (unexpected) first line was {!r} in {}". format(firstLine, thisFilename)) halt continue foundProjects.append(( tryFolderName, thisFilename, )) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("UnboundBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1 uB = UnboundBible( foundProjects[0][0], foundProjects[0][1] [:-9]) # Remove the end of the actual filename "_utf8.txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def __validateAndExtractVerse(self, BBB, chapterNumber, thisBook, verse): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating XML verse…")) location = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20') BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks') # Handle verse attributes verseNumber = toVerseNumber = None for attrib, value in verse.items(): if attrib == "n": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber location = "{}:{}".format( location, verseNumber) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) ## Handle verse subelements (notes and styled portions) #for subelement in verse: #if subelement.tag == VerseViewXMLBible.noteTag: #sublocation = "note in " + location #noteType = None #for attrib,value in subelement.items(): #if attrib=="type": noteType = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if noteType and noteType not in ('variant',): #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) #nText, nTail = subelement.text, subelement.tail ##print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) #if nTail: #if '\n' in nTail: #print( "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) #nTail = nTail.replace( '\n', ' ' ) #vText += nTail #for sub2element in subelement: #if sub2element.tag == VerseViewXMLBible.styleTag: #sub2location = "style in " + sublocation #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' ) #fs = css = idStyle = None #for attrib,value in sub2element.items(): #if attrib=='fs': fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle #SFM = None #if fs == 'italic': SFM = '\\it' #elif fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: print( "css is", css, "idStyle is", idStyle ); halt #sText, sTail = sub2element.text.strip(), sub2element.tail #if BibleOrgSysGlobals.debugFlag: assert sText #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) ) #elif subelement.tag == VerseViewXMLBible.styleTag: #sublocation = "style in " + location #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) #fs = css = idStyle = None #for attrib,value in subelement.items(): #if attrib=="fs": fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs #SFM = None #if fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: print( "css is", css, "idStyle is", idStyle ); halt #sText, sTail = subelement.text.strip(), subelement.tail #if BibleOrgSysGlobals.debugFlag: assert sText ##print( BBB, chapterNumber, sublocation ) #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #elif subelement.tag == VerseViewXMLBible.breakTag: #sublocation = "line break in " + location #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) #art = None #for attrib,value in subelement.items(): #if attrib=="art": #art = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl' ##print( BBB, chapterNumber, verseNumber ) ##assert vText #if vText: #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None #vText = '' #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) ##bTail = subelement.tail ##if bTail: vText = bTail.strip() #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: print( "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}" .format(BBB, chapterNumber, verseNumber, vText)) vText = vText.replace('\n', ' ') thisBook.addLine('v', verseNumber + ' ' + vText) verseNumber = None
def __validateAndExtractVerse( self, BBB, chapterNumber, thisBook, verse ): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML verse…") ) location = "verse in {} {}".format( BBB, chapterNumber ) BibleOrgSysGlobals.checkXMLNoTail( verse, location, 'l5ks' ) # Handle verse attributes verseNumber = toVerseNumber = None for attrib,value in verse.items(): if attrib=="vnumber": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert verseNumber location = "{}:{}".format( location, verseNumber ) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) # Handle verse subelements (notes and styled portions) for subelement in verse: if subelement.tag == HaggaiXMLBible.noteTag: sublocation = "note in " + location noteType = None for attrib,value in subelement.items(): if attrib=="type": noteType = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if noteType and noteType not in ('variant',): logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) nText, nTail = subelement.text, subelement.tail #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) if nTail: if '\n' in nTail: print( "HaggaiXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) nTail = nTail.replace( '\n', ' ' ) vText += nTail for subsubelement in subelement: if subsubelement.tag == HaggaiXMLBible.styleTag: subsublocation = "style in " + sublocation BibleOrgSysGlobals.checkXMLNoSubelements( subsubelement, subsublocation, 'fyt4' ) fs = css = idStyle = None for attrib,value in subsubelement.items(): if attrib=='fs': fs = value #elif attrib=="css": css = value #elif attrib=="id": idStyle = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subsubelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle SFM = None if fs == 'italic': SFM = '\\it' elif fs == 'super': SFM = '\\bdit' elif fs == 'emphasis': SFM = '\\em' else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt #if css == "font-style:italic": SFM = '\\it' #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' #elif css == "color:#FF0000": SFM = '\\em' #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' #elif css is None and idStyle=='cl:divineName': SFM = '\\nd' #else: print( "css is", css, "idStyle is", idStyle ); halt sText, sTail = subsubelement.text.strip(), subsubelement.tail if BibleOrgSysGlobals.debugFlag: assert sText if SFM: vText += SFM+' ' + sText + SFM+'*' else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() else: logging.error( "Expected to find {} but got {!r} in {}".format( HaggaiXMLBible.styleTag, subsubelement.tag, sublocation ) ) elif subelement.tag == HaggaiXMLBible.styleTag: sublocation = "style in " + location BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) fs = css = idStyle = None for attrib,value in subelement.items(): if attrib=="fs": fs = value #elif attrib=="css": css = value #elif attrib=="id": idStyle = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert fs SFM = None if fs == 'super': SFM = '\\bdit' elif fs == 'emphasis': SFM = '\\em' else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt #if css == "font-style:italic": SFM = '\\it' #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' #elif css == "color:#FF0000": SFM = '\\em' #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' #elif css is None and idStyle=='cl:divineName': SFM = '\\nd' #else: print( "css is", css, "idStyle is", idStyle ); halt sText, sTail = subelement.text.strip(), subelement.tail if BibleOrgSysGlobals.debugFlag: assert sText #print( BBB, chapterNumber, sublocation ) if SFM: vText += SFM+' ' + sText + SFM+'*' else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles if sTail: vText += sTail.strip() elif subelement.tag == HaggaiXMLBible.breakTag: sublocation = "line break in " + location BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) art = None for attrib,value in subelement.items(): if attrib=="art": art = value else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl' #print( BBB, chapterNumber, verseNumber ) #assert vText if vText: thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None vText = '' thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) #bTail = subelement.tail #if bTail: vText = bTail.strip() else: logging.error( "Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: print( "HaggaiXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) vText = vText.replace( '\n', ' ' ) thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
def load( self, filename, folder=None, encoding='utf-8' ): """ Load a single source USX XML file and extract the information. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("load( {}, {}, {} )").format( filename, folder, encoding ) ) def loadParagraph( paragraphXML, paragraphlocation ): """ Load a paragraph from the USX XML. In this context, paragraph means heading and intro lines, as well as paragraphs of verses. Uses (and updates) C,V information from the containing function. """ nonlocal C, V # Process the attributes first paragraphStyle = None for attrib,value in paragraphXML.items(): if attrib=='style': paragraphStyle = value # This is basically the USFM marker name else: logging.warning( _("CH46 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) # Now process the paragraph text (or write a paragraph marker anyway) paragraphText = paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else '' if version is None: paragraphText = paragraphText.rstrip() # Don't need to strip extra spaces in v2 self.addLine( paragraphStyle, paragraphText ) # Now process the paragraph subelements for element in paragraphXML: location = element.tag + ' ' + paragraphlocation #print( "USXXMLBibleBook.load", C, V, element.tag, location ) if element.tag == 'verse': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first verseStyle = altNumber = None for attrib,value in element.items(): if attrib=='number': V = value elif attrib=='style': verseStyle = value elif attrib=='altnumber': altNumber = value else: logging.error( _("KR60 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if verseStyle != 'v': logging.error( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) ) #if altNumber: print( repr(verseStyle), repr(altNumber) ); halt altStuff = ' \\va {}\\va*'.format( altNumber ) if altNumber else '' self.addLine( verseStyle, V + altStuff + ' ' ) # Now process the tail (if there's one) which is the verse text if element.tail: vText = element.tail if vText[0]=='\n': vText = vText.lstrip() # Paratext puts cross references on a new line if vText: #print( repr(vText) ) self.appendToLastLine( vText ) elif element.tag == 'char': # Process the attributes first charStyle = None for attrib,value in element.items(): if attrib=='style': charStyle = value # This is basically the USFM character marker name #print( " charStyle", charStyle ) assert not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle ) else: logging.error( _("QU52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) charLine = "\\{} {} ".format( charStyle, element.text ) # Now process the subelements -- chars are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( '{} {}:{} {}'.format( self.BBB, C, V, element.tag ) ) if subelement.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first subCharStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': subCharStyle = value elif attrib=='closed': assert value=='false' charClosed = False else: logging.error( _("KS41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) charLine += "\\{} {}".format( subCharStyle, subelement.text ) if charClosed: charLine += "\\{}*".format( subCharStyle ) #if subelement.tail is not None: print( " tail1", repr(subelement.tail) ) charLine += '' if subelement.tail is None else subelement.tail else: logging.error( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) ) # A character field must be added to the previous field #if element.tail is not None: print( " tail2", repr(element.tail) ) charTail = '' if element.tail: charTail = element.tail if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts footnote parts on new lines charLine += "\\{}*{}".format( charStyle, charTail ) #if debuggingThisModule: print( "USX.loadParagraph:", C, V, paragraphStyle, charStyle, repr(charLine) ) self.appendToLastLine( charLine ) elif element.tag == 'note': #print( "NOTE", BibleOrgSysGlobals.elementStr( element ) ) # Process the attributes first noteStyle = noteCaller = None for attrib,value in element.items(): if attrib=='style': noteStyle = value # This is basically the USFM marker name assert noteStyle in ('x','f',) elif attrib=='caller': noteCaller = value # Usually hyphen or a symbol to be used for the note else: logging.error( _("CY38 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if noteCaller=='' and self.BBB=='NUM' and C=='10' and V=='36': noteCaller = '+' # Hack assert noteStyle and noteCaller # both compulsory noteLine = "\\{} {} ".format( noteStyle, noteCaller ) if element.text: noteText = element.text.strip() noteLine += noteText # Now process the subelements -- notes are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( C, V, subelement.tag ) if subelement.tag == 'char': # milestone (not a container) # Process the attributes first charStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': charStyle = value elif attrib=='closed': assert value=='false' charClosed = False else: logging.warning( _("GJ67 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) noteLine += "\\{} {}".format( charStyle, subelement.text ) # Now process the subelements -- notes are one of the few multiply embedded fields in USX for sub2element in subelement: sub2location = sub2element.tag + ' ' + sublocation #print( C, V, sub2element.tag ) if sub2element.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location ) # Process the attributes first char2Style, char2Closed = None, True for attrib,value in sub2element.items(): if attrib=='style': char2Style = value elif attrib=='closed': assert value=='false' char2Closed = False else: logging.warning( _("VH36 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) assert char2Closed noteLine += "\\{} {}\\{}*{}".format( char2Style, sub2element.text, char2Style, sub2element.tail if sub2element.tail else '' ) if charClosed: noteLine += "\\{}*".format( charStyle ) if subelement.tail: charTail = subelement.tail if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts cross reference parts on a new line noteLine += charTail elif subelement.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first unmmatchedMarker = None for attrib,value in subelement.items(): if attrib=='marker': unmmatchedMarker = value else: logging.warning( _("NV21 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) self.addPriorityError( 2, C, V, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) ) else: logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) ) if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail #noteLine += "\\{}*".format( charStyle ) noteLine += "\\{}*".format( noteStyle ) if element.tail: #if '\n' in element.tail: halt noteTail = element.tail if noteTail[0]=='\n': noteTail = noteTail.lstrip() # Paratext puts multiple cross-references on new lines noteLine += noteTail #print( "NoteLine", repr(noteLine) ) self.appendToLastLine( noteLine ) elif element.tag == 'link': # Used to include extra resources BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first linkStyle = linkDisplay = linkTarget = None for attrib,value in element.items(): if attrib=='style': linkStyle = value assert linkStyle in ('jmp',) elif attrib=='display': linkDisplay = value # e.g., "click here" elif attrib=='target': linkTarget = value # e.g., some reference else: logging.warning( _("KW54 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.addPriorityError( 3, C, V, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) ) elif element.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) self.addPriorityError( 2, C, V, _("Unmatched element in {}").format( location) ) else: logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, C, V, location ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} element").format( element.tag ) ) for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] ) if BibleOrgSysGlobals.debugFlag: halt # end of loadParagraph C = V = '0' loadErrors = [] lastMarker = None if BibleOrgSysGlobals.verbosityLevel > 3: print( " " + _("Loading {} from {}…").format( filename, folder ) ) elif BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Loading {}…").format( filename ) ) self.isOneChapterBook = self.BBB in BibleOrgSysGlobals.BibleBooksCodes.getSingleChapterBooksList() self.sourceFilename = filename self.sourceFolder = folder self.sourceFilepath = os.path.join( folder, filename ) if folder else filename try: self.tree = ElementTree().parse( self.sourceFilepath ) except ParseError as err: logging.critical( exp("Loader parse error in xml file {}: {} {}").format( filename, sys.exc_info()[0], err ) ) loadErrors.append( exp("Loader parse error in xml file {}: {} {}").format( filename, sys.exc_info()[0], err ) ) self.addPriorityError( 100, C, V, _("Loader parse error in xml file {}: {}").format( filename, err ) ) if BibleOrgSysGlobals.debugFlag: assert len ( self.tree ) # Fail here if we didn't load anything at all # Find the main container if 'tree' in dir(self) \ and ( self.tree.tag=='usx' or self.tree.tag=='usfm' ): # Not sure why both are allowable location = "USX ({}) file".format( self.tree.tag ) BibleOrgSysGlobals.checkXMLNoText( self.tree, location ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location ) # Process the attributes first self.schemaLocation = '' version = None for attrib,value in self.tree.items(): if attrib=='version': version = value else: logging.warning( _("DG84 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if version not in ( None, '2.0' ): logging.warning( _("Not sure if we can handle v{} USX files").format( version ) ) # Now process the data for element in self.tree: sublocation = element.tag + " " + location if element.tag == 'book': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation ) # Process the attributes idField = bookStyle = None for attrib,value in element.items(): if attrib=='id' or attrib=='code': idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode) #if idField != BBB: # logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) ) elif attrib=='style': bookStyle = value else: logging.warning( _("MD12 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if bookStyle != 'id': logging.warning( _("Unexpected style attribute ({}) in {}").format( bookStyle, sublocation ) ) idLine = idField if element.text and element.text.strip(): idLine += ' ' + element.text self.addLine( 'id', idLine ) elif element.tag == 'chapter': # milestone (not a container) V = '0' BibleOrgSysGlobals.checkXMLNoText( element, sublocation ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation ) # Process the attributes chapterStyle = pubNumber = None for attrib,value in element.items(): if attrib=='number': C = value elif attrib=='style': chapterStyle = value elif attrib=='pubnumber': pubNumber = value else: logging.error( _("LY76 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if chapterStyle != 'c': logging.warning( _("Unexpected style attribute ({}) in {}").format( chapterStyle, sublocation ) ) #if pubNumber: print( self.BBB, C, repr(pubNumber) ); halt self.addLine( 'c', C ) if pubNumber: self.addLine( 'cp', pubNumber ) elif element.tag == 'para': BibleOrgSysGlobals.checkXMLNoTail( element, sublocation ) USFMMarker = element.attrib['style'] # Get the USFM code for the paragraph style if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( USFMMarker ): #if lastMarker: self.addLine( lastMarker, lastText ) #lastMarker, lastText = USFMMarker, text loadParagraph( element, sublocation ) elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( USFMMarker ): # the line begins with an internal USFM Marker -- append it to the previous line text = element.text if text is None: text = '' if BibleOrgSysGlobals.debugFlag: print( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, C, V, USFMMarker, text ) ) #halt # Not checked yet if text: loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, C, V, USFMMarker, text ) ) logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, C, V, text ) ) else: # no text loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM Marker at beginning of line (with no text)").format( self.BBB, C, V, USFMMarker ) ) logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, C, V ) ) self.addPriorityError( 97, C, V, _("Found \\{} internal USFM Marker on new line in file").format( USFMMarker ) ) #lastText += '' if lastText.endswith(' ') else ' ' # Not always good to add a space, but it's their fault! lastText = '\\' + USFMMarker + ' ' + text #print( "{} {} {} Now have {}:{!r}".format( self.BBB, C, V, lastMarker, lastText ) ) else: # the line begins with an unknown USFM Marker try: status = element.attrib['status'] except KeyError: status = None text = element.text if text: loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line with text: {}").format( self.BBB, C, V, USFMMarker, text ) ) logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, C, V, text ) ) else: # no text loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line (with no text").format( self.BBB, C, V, USFMMarker ) ) logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, C, V ) ) self.addPriorityError( 100, C, V, _("Found \\{} unknown USFM Marker on new line in file").format( USFMMarker ) ) if status == 'unknown': # USX exporter already knew it was a bad marker pass # Just drop it completely else: for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space if USFMMarker.startswith( tryMarker ): # Let's try changing it if lastMarker: self.addLine( lastMarker, lastText ) lastMarker, lastText = tryMarker, USFMMarker[len(tryMarker):] + ' ' + text loadErrors.append( _("{} {}:{} Changed '\\{}' unknown USFM Marker to {!r} at beginning of line: {}").format( self.BBB, C, V, USFMMarker, tryMarker, text ) ) logging.warning( _("Changed '\\{}' unknown USFM Marker to {!r} after {} {}:{} at beginning of line: {}").format( USFMMarker, tryMarker, self.BBB, C, V, text ) ) break # Otherwise, don't bother processing this line -- it'll just cause more problems later on else: logging.error( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, C, V, sublocation ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} element").format( element.tag ) ) if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating OpenSong XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText( bookName) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName) # Try non-English booknames #print( "bookName", bookName, BBB ) if BBB: if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB) thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), ProgNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Saving {} into results…".format(BBB)) self.stashBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName
def __validateAndExtractChapter(self, BBB, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.verbosityLevel > 3: print(_("Validating XML chapter…")) # Process the div attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value elif attrib == "VERSES": numVerses = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #print( BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '') # Fix a mistake in the Chinese_SU module thisBook.addLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for {}".format(BBB)) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks') verseNumber = toVerseNumber = None for attrib, value in element.items(): if attrib == "n": verseNumber = value elif attrib == "t": toVerseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element". format(attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber #thisBook.addLine( 'v', verseNumber ) vText = element.text if element.text else '' for subelement in element: sub2location = "{} in {}".format(subelement.tag, sublocation) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sub2location, 'ks03') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sub2location, 'ks05') if subelement.tag == 'i': vText += '\\it {}\\it*{}'.format( subelement.text, subelement.tail) else: logging.error( "Expected to find 'i' but got {!r}".format( subelement.tag)) vText += element.tail if element.tail else '' if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, verseNumber)) #print( 'vText1', vText ) if vText: # This is the main text of the verse (follows the verse milestone) #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #print( "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate(vText.split('\n')): if j == 0: thisBook.addLine('q1', '') thisBook.addLine('v', verseNumber + ' ' + textBit) else: thisBook.addLine('q1', textBit) else: # Just one verse line thisBook.addLine('v', verseNumber + ' ' + vText) #print( 'vText2', vText ) else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag))
def writeOpenSongBook(writerObject, BBB, bkData): """Writes a book to the OpenSong XML writerObject.""" #print( 'BIBLEBOOK', [('bnumber',BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)), ('bname',BibleOrgSysGlobals.BibleBooksCodes.getEnglishName_NR(BBB)), ('bsname',BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation(BBB))] ) OSISAbbrev = BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation( BBB) if not OSISAbbrev: logging.warning( "toOpenSong: Can't write {} OpenSong book because no OSIS code available" .format(BBB)) unhandledBooks.append(BBB) return writerObject.writeLineOpen('b', ('n', bkData.getAssumedBookNames()[0])) haveOpenChapter, startedFlag, gotVP, accumulator = False, False, None, "" C, V = '-1', '-1' # So first/id line starts at -1:0 for processedBibleEntry in bkData._processedLines: # Process internal Bible data lines marker, text, extras = processedBibleEntry.getMarker( ), processedBibleEntry.getCleanText( ), processedBibleEntry.getExtras() #print( marker, repr(text) ) #if text: assert text[0] != ' ' if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS: continue # Just ignore added markers -- not needed here if marker in USFM_PRECHAPTER_MARKERS: if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert C == '-1' or marker == 'rem' or marker.startswith( 'mte') V = str(int(V) + 1) if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in ( 'ie', ): # Just ignore these lines ignoredMarkers.add(marker) elif marker == 'c': if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) accumulator = '' if haveOpenChapter: writerObject.writeLineClose('c') C, V = text, '0' writerObject.writeLineOpen('c', ('n', text)) haveOpenChapter = True elif marker in ( 'c#', ): # These are the markers that we can safely ignore for this export ignoredMarkers.add(marker) elif marker == 'vp#': # This precedes a v field and has the verse number to be printed gotVP = text # Just remember it for now elif marker == 'v': V = text if gotVP: # this is the verse number to be published text = gotVP gotVP = None startedFlag = True if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) accumulator = '' #print( "Text {!r}".format( text ) ) if not text: logging.warning("createOpenSongXML: Missing text for v") continue verseNumberString = text.replace('<', '').replace( '>', '' ).replace( '"', '' ) # Used below but remove anything that'll cause a big XML problem later elif marker in ('mt1','mt2','mt3','mt4', 'mte1','mte2','mte3','mte4', 'ms1','ms2','ms3','ms4', ) \ or marker in USFM_ALL_INTRODUCTION_MARKERS \ or marker in ('s1','s2','s3','s4', 'r','sr','mr', 'd','sp','cd', 'cl','lit', ): ignoredMarkers.add(marker) elif marker in USFM_BIBLE_PARAGRAPH_MARKERS: if BibleOrgSysGlobals.debugFlag: assert not text and not extras ignoredMarkers.add(marker) elif marker in ( 'b', 'nb', 'ib', ): if BibleOrgSysGlobals.debugFlag: assert not text and not extras ignoredMarkers.add(marker) elif marker in ( 'v~', 'p~', ): if BibleOrgSysGlobals.debugFlag: assert text or extras if not text: # this is an empty (untranslated) verse text = '- - -' # but we'll put in a filler if startedFlag: accumulator += (' ' if accumulator else '') + BibleOrgSysGlobals.makeSafeXML(text) else: if text: logging.warning( "toOpenSong: lost text in {} field in {} {}:{} {!r}". format(marker, BBB, C, V, text)) #if BibleOrgSysGlobals.debugFlag: halt if extras: logging.warning( "toOpenSong: lost extras in {} field in {} {}:{}". format(marker, BBB, C, V)) #if BibleOrgSysGlobals.debugFlag: halt unhandledMarkers.add(marker) if extras and marker not in ( 'v~', 'p~', ) and marker not in ignoredMarkers: logging.critical( "toOpenSong: extras not handled for {} at {} {}:{}".format( marker, BBB, C, V)) if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) if haveOpenChapter: writerObject.writeLineClose('c') writerObject.writeLineClose('b')
def loadSystems(self, folder=None): """ Load and pre-process the specified booksNames systems. """ if not self.__XMLSystems: # Only ever do this once if folder == None: folder = os.path.join( os.path.dirname(__file__), "DataFiles", "BookNames") # Relative to module, not cwd self.__XMLFolder = folder if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading book names systems from {}…").format(folder)) for filename in os.listdir(folder): filepart, extension = os.path.splitext(filename) if extension.upper() == '.XML' and filepart.upper().startswith( self.__filenameBase.upper() + "_"): booksNamesSystemCode = filepart[len(self.__filenameBase) + 1:] if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Loading {} books names system from {}…").format( booksNamesSystemCode, filename)) self.__XMLSystems[booksNamesSystemCode] = {} self.__XMLSystems[booksNamesSystemCode][ "languageCode"] = booksNamesSystemCode.split('_', 1)[0] self.__XMLSystems[booksNamesSystemCode][ 'tree'] = ElementTree().parse( os.path.join(folder, filename)) assert self.__XMLSystems[booksNamesSystemCode][ 'tree'] # Fail here if we didn't load anything at all # Check and remove the header element if self.__XMLSystems[booksNamesSystemCode][ 'tree'].tag == self.XMLTreeTag: header = self.__XMLSystems[booksNamesSystemCode][ 'tree'][0] if header.tag == self.headerTag: self.__XMLSystems[booksNamesSystemCode][ "header"] = header self.__XMLSystems[booksNamesSystemCode][ 'tree'].remove(header) BibleOrgSysGlobals.checkXMLNoText(header, "header") BibleOrgSysGlobals.checkXMLNoTail(header, "header") BibleOrgSysGlobals.checkXMLNoAttributes( header, "header") if len(header) > 1: logging.info( _("Unexpected elements in header")) elif len(header) == 0: logging.info( _("Missing work element in header")) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText( work, "work in header") BibleOrgSysGlobals.checkXMLNoTail( work, "work in header") BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header") if work.tag == "work": self.__XMLSystems[booksNamesSystemCode][ 'version'] = work.find('version').text self.__XMLSystems[booksNamesSystemCode][ "date"] = work.find("date").text self.__XMLSystems[booksNamesSystemCode][ "title"] = work.find("title").text else: logging.warning( _("Missing work element in header")) else: logging.warning( _("Missing header element (looking for {!r} tag)" ).format(self.headerTag)) else: logging.error( _("Expected to load {!r} but got {!r}").format( self.XMLTreeTag, self.__XMLSystems[booksNamesSystemCode] ['tree'].tag)) bookCount = 0 # There must be an easier way to do this for subelement in self.__XMLSystems[booksNamesSystemCode][ 'tree']: bookCount += 1 if BibleOrgSysGlobals.verbosityLevel > 2: print( _(" Loaded {} books for {}").format( bookCount, booksNamesSystemCode)) logging.info( _(" Loaded {} books for {}").format( bookCount, booksNamesSystemCode)) if BibleOrgSysGlobals.strictCheckingFlag: self.__validateSystem(booksNamesSystemCode) return self
def createMySwordModule(self, outputFolder, controlDict): """ Create a SQLite3 database module for the program MySword. self here is a Bible object with _processedLines """ import tarfile from InternalBibleInternals import BOS_ADDED_NESTING_MARKERS, BOS_NESTING_MARKERS from theWordBible import theWordOTBookLines, theWordNTBookLines, theWordBookLines, theWordHandleIntroduction, theWordComposeVerseLine def writeMSBook(sqlObject, BBB, ourGlobals): """ Writes a book to the MySword sqlObject file. """ nonlocal lineCount bkData = self.books[BBB] if BBB in self.books else None #print( bkData._processedLines ) verseList = BOS.getNumVersesList(BBB) nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) numC, numV = len(verseList), verseList[0] ourGlobals['line'], ourGlobals['lastLine'] = '', None ourGlobals['pi1'] = ourGlobals['pi2'] = ourGlobals['pi3'] = ourGlobals[ 'pi4'] = ourGlobals['pi5'] = ourGlobals['pi6'] = ourGlobals[ 'pi7'] = False if bkData: # Write book headings (stuff before chapter 1) ourGlobals['line'] = theWordHandleIntroduction( BBB, bkData, ourGlobals) # Write the verses C = V = 1 ourGlobals['lastLine'] = ourGlobals['lastBCV'] = None while True: verseData = None if bkData: try: result = bkData.getContextVerseData(( BBB, str(C), str(V), )) verseData, context = result except KeyError: # Missing verses logging.warning( "BibleWriter.createMySwordModule: missing source verse at {} {}:{}" .format(BBB, C, V)) # Handle some common versification anomalies if (BBB, C, V) == ('JN3', 1, 14): # Add text for v15 if it exists try: result15 = bkData.getContextVerseData(( 'JN3', '1', '15', )) verseData15, context15 = result15 verseData.extend(verseData15) except KeyError: pass # just ignore it elif (BBB, C, V) == ('REV', 12, 17): # Add text for v15 if it exists try: result18 = bkData.getContextVerseData(( 'REV', '12', '18', )) verseData18, context18 = result18 verseData.extend(verseData18) except KeyError: pass # just ignore it composedLine = '' if verseData: composedLine = theWordComposeVerseLine( BBB, C, V, verseData, ourGlobals) # Stay one line behind (because paragraph indicators get appended to the previous line) if ourGlobals['lastBCV'] is not None \ and ourGlobals['lastLine']: # don't bother writing blank (unfinished?) verses sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \ (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) ) lineCount += 1 ourGlobals['lastLine'] = composedLine ourGlobals['lastBCV'] = (nBBB, C, V) V += 1 if V > numV: C += 1 if C > numC: break else: # next chapter only numV = verseList[C - 1] V = 1 #assert not ourGlobals['line'] and not ourGlobals['lastLine'] # We should have written everything # Write the last line of the file if ourGlobals[ 'lastLine']: # don't bother writing blank (unfinished?) verses sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \ (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) ) lineCount += 1 # end of createMySwordModule.writeMSBook # Set-up their Bible reference system BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') #BRL = BibleReferenceList( BOS, BibleObject=None ) # Try to figure out if it's an OT/NT or what (allow for up to 4 extra books like FRT,GLS, etc.) if len(self) <= (39 + 4) and self.containsAnyOT39Books( ) and not self.containsAnyNT27Books(): testament, startBBB, endBBB = 'OT', 'GEN', 'MAL' booksExpected, textLineCountExpected, checkTotals = 39, 23145, theWordOTBookLines elif len(self) <= (27 + 4) and self.containsAnyNT27Books( ) and not self.containsAnyOT39Books(): testament, startBBB, endBBB = 'NT', 'MAT', 'REV' booksExpected, textLineCountExpected, checkTotals = 27, 7957, theWordNTBookLines else: # assume it's an entire Bible testament, startBBB, endBBB = 'BOTH', 'GEN', 'REV' booksExpected, textLineCountExpected, checkTotals = 66, 31102, theWordBookLines extension = '.bbl.mybible' if BibleOrgSysGlobals.verbosityLevel > 2: print(_(" Exporting to MySword format…")) mySettings = {} mySettings['unhandledMarkers'] = set() handledBooks = [] if 'MySwordOutputFilename' in controlDict: filename = controlDict['MySwordOutputFilename'] elif self.sourceFilename: filename = self.sourceFilename elif self.shortName: filename = self.shortName elif self.abbreviation: filename = self.abbreviation elif self.name: filename = self.name else: filename = 'export' if not filename.endswith(extension): filename += extension # Make sure that we have the right file extension filepath = os.path.join(outputFolder, BibleOrgSysGlobals.makeSafeFilename(filename)) if os.path.exists(filepath): os.remove(filepath) if BibleOrgSysGlobals.verbosityLevel > 2: print(' createMySwordModule: ' + _("Writing {!r}…").format(filepath)) conn = sqlite3.connect(filepath) cursor = conn.cursor() # First write the settings Details table exeStr = 'CREATE TABLE Details(Description NVARCHAR(255), Abbreviation NVARCHAR(50), Comments TEXT, Version TEXT, VersionDate DATETIME, PublishDate DATETIME, RightToLeft BOOL, OT BOOL, NT BOOL, Strong BOOL' # incomplete customCSS = self.getSetting('CustomCSS') if customCSS: exeStr += ', CustomCSS TEXT' exeStr += ')' cursor.execute(exeStr) values = [] description = self.getSetting('Description') if not description: description = self.getSetting('description') if not description: description = self.name values.append(description) if self.abbreviation: abbreviation = self.abbreviation else: abbreviation = self.getSetting('WorkAbbreviation') if not abbreviation: abbreviation = self.name[:3].upper() values.append(abbreviation) comments = self.getSetting('Comments') values.append(comments) version = self.getSetting('Version') values.append(version) versionDate = self.getSetting('VersionDate') values.append(versionDate) publishDate = self.getSetting('PublishDate') values.append(publishDate) rightToLeft = self.getSetting('RightToLeft') values.append(rightToLeft) values.append(True if testament == 'OT' or testament == 'BOTH' else False) values.append(True if testament == 'NT' or testament == 'BOTH' else False) Strong = self.getSetting('Strong') values.append(Strong if Strong else False) if customCSS: values.append(customCSS) exeStr = 'INSERT INTO "Details" VALUES(' + '?,' * (len(values) - 1) + '?)' #print( exeStr, values ) cursor.execute(exeStr, values) #if BibleOrgSysGlobals.debugFlag: cursor.execute( exeStr, values ) #else: # Not debugging #try: cursor.execute( exeStr, values ) #except sqlite3.InterfaceError: #logging.critical( "SQLite3 Interface error executing {} with {}".format( exeStr, values ) ) # Now create and fill the Bible table cursor.execute( 'CREATE TABLE Bible(Book INT, Chapter INT, Verse INT, Scripture TEXT, Primary Key(Book,Chapter,Verse))' ) conn.commit() # save (commit) the changes BBB, lineCount = startBBB, 0 while True: # Write each Bible book in the KJV order writeMSBook(cursor, BBB, mySettings) conn.commit() # save (commit) the changes handledBooks.append(BBB) if BBB == endBBB: break BBB = BOS.getNextBookCode(BBB) conn.commit() # save (commit) the changes cursor.close() if mySettings['unhandledMarkers']: logging.warning( "BibleWriter.createMySwordModule: Unhandled markers were {}". format(mySettings['unhandledMarkers'])) if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("WARNING: Unhandled createMySwordModule markers were {}" ).format(mySettings['unhandledMarkers'])) unhandledBooks = [] for BBB in self.getBookList(): if BBB not in handledBooks: unhandledBooks.append(BBB) if unhandledBooks: logging.warning("createMySwordModule: Unhandled books were {}".format( unhandledBooks)) if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("WARNING: Unhandled createMySwordModule books were {}" ).format(unhandledBooks)) # Now create the gzipped file if BibleOrgSysGlobals.verbosityLevel > 2: print(" Compressing {} MySword file…".format(filename)) tar = tarfile.open(filepath + '.gz', 'w:gz') tar.add(filepath) tar.close() if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1: print(" BibleWriter.createMySwordModule finished successfully.") return True
def load( self, filename, folder=None, encoding='utf-8' ): """ Load a single source USX XML file and extract the information. """ def loadParagraph( paragraphXML, paragraphlocation ): """ Load a paragraph from the USX XML. Uses (and updates) c,v information from the containing function. """ nonlocal c, v # Process the attributes first paragraphStyle = None for attrib,value in paragraphXML.items(): if attrib=='style': paragraphStyle = value # This is basically the USFM marker name else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) # Now process the paragraph text (or write a paragraph marker anyway) self.addLine( paragraphStyle, paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else '' ) # Now process the paragraph subelements for element in paragraphXML: location = element.tag + ' ' + paragraphlocation #print( "USXXMLBibleBook.load", c, v, element.tag, location ) if element.tag == 'verse': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first verseStyle = None for attrib,value in element.items(): if attrib=='number': v = value elif attrib=='style': verseStyle = value else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if verseStyle != 'v': logging.warning( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) ) self.addLine( verseStyle, v + ' ' ) # Now process the tail (if there's one) which is the verse text if element.tail: vText = element.tail.strip() if vText: #print( repr(vText) ) self.appendToLastLine( vText ) elif element.tag == 'char': # Process the attributes first charStyle = None for attrib,value in element.items(): if attrib=='style': charStyle = value # This is basically the USFM character marker name #print( " charStyle", charStyle ) assert( not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle ) ) else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) charLine = "\\{} {} ".format( charStyle, element.text ) # Now process the subelements -- chars are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( c, v, element.tag ) if subelement.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first subCharStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': subCharStyle = value elif attrib=='closed': assert( value=='false' ) charClosed = False else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) charLine += "\\{} {}".format( subCharStyle, subelement.text ) if charClosed: charLine += "\\{}*".format( subCharStyle ) charLine += '' if subelement.tail is None else subelement.tail.strip() else: logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) ) # A character field must be added to the previous field charLine += "\\{}*{}".format( charStyle, '' if element.tail is None else element.tail.strip() ) if debuggingThisModule: print( "USX.loadParagraph:", c, v, paragraphStyle, charStyle, repr(charLine) ) self.appendToLastLine( charLine ) elif element.tag == 'note': BibleOrgSysGlobals.checkXMLNoText( element, location ) # Process the attributes first noteStyle = noteCaller = None for attrib,value in element.items(): if attrib=='style': noteStyle = value # This is basically the USFM marker name assert( noteStyle in ('x','f',) ) elif attrib=='caller': noteCaller = value # Usually hyphen or a symbol to be used for the note else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) assert( noteStyle and noteCaller ) # both compulsory noteLine = "\\{} {} ".format( noteStyle, noteCaller ) # Now process the subelements -- notes are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( c, v, element.tag ) if subelement.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first charStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': charStyle = value elif attrib=='closed': assert( value=='false' ) charClosed = False else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) noteLine += "\\{} {}".format( charStyle, subelement.text ) if charClosed: noteLine += "\\{}*".format( charStyle ) noteLine += '' if subelement.tail is None else subelement.tail.strip() elif subelement.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first unmmatchedMarker = None for attrib,value in subelement.items(): if attrib=='marker': unmmatchedMarker = value else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) self.addPriorityError( 2, c, v, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) ) else: logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) ) if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail #noteLine += "\\{}*".format( charStyle ) noteLine += "\\{}*".format( noteStyle ) if element.tail: noteText = element.tail.strip() noteLine += noteText self.appendToLastLine( noteLine ) elif element.tag == 'link': # Used to include extra resources BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first linkStyle = linkDisplay = linkTarget = None for attrib,value in element.items(): if attrib=='style': linkStyle = value assert( linkStyle in ('jmp',) ) elif attrib=='display': linkDisplay = value # e.g., "click here" elif attrib=='target': linkTarget = value # e.g., some reference else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.addPriorityError( 3, c, v, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) ) elif element.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) self.addPriorityError( 2, c, v, _("Unmatched element in {}").format( location) ) else: logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, c, v, location ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] ) if BibleOrgSysGlobals.debugFlag: halt # end of loadParagraph if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Loading {}...").format( filename ) ) self.isOneChapterBook = self.BBB in BibleOrgSysGlobals.BibleBooksCodes.getSingleChapterBooksList() self.sourceFilename = filename self.sourceFolder = folder self.sourceFilepath = os.path.join( folder, filename ) if folder else filename self.tree = ElementTree().parse( self.sourceFilepath ) assert( len ( self.tree ) ) # Fail here if we didn't load anything at all c = v = '0' loadErrors = [] lastMarker = None # Find the main container if self.tree.tag=='usx' or self.tree.tag=='usfm': # Not sure why both are allowable location = "USX ({}) file".format( self.tree.tag ) BibleOrgSysGlobals.checkXMLNoText( self.tree, location ) BibleOrgSysGlobals.checkXMLNoTail( self.tree, location ) # Process the attributes first self.schemaLocation = '' version = None for attrib,value in self.tree.items(): if attrib=='version': version = value logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if version not in ( None, '2.0' ): logging.warning( _("Not sure if we can handle v{} USX files").format( version ) ) # Now process the data for element in self.tree: sublocation = element.tag + " " + location if element.tag == 'book': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation ) # Process the attributes idField = bookStyle = None for attrib,value in element.items(): if attrib=='id' or attrib=='code': idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode) #if idField != BBB: # logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) ) elif attrib=='style': bookStyle = value else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if bookStyle != 'id': logging.warning( _("Unexpected style attribute ({}) in {}").format( bookStyle, sublocation ) ) idLine = idField if element.text and element.text.strip(): idLine += ' ' + element.text self.addLine( 'id', idLine ) elif element.tag == 'chapter': # milestone (not a container) v = '0' BibleOrgSysGlobals.checkXMLNoText( element, sublocation ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation ) # Process the attributes chapterStyle = None for attrib,value in element.items(): if attrib=='number': c = value elif attrib=='style': chapterStyle = value else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) if chapterStyle != 'c': logging.warning( _("Unexpected style attribute ({}) in {}").format( chapterStyle, sublocation ) ) self.addLine( 'c', c ) elif element.tag == 'para': BibleOrgSysGlobals.checkXMLNoTail( element, sublocation ) USFMMarker = element.attrib['style'] # Get the USFM code for the paragraph style if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( USFMMarker ): #if lastMarker: self.addLine( lastMarker, lastText ) #lastMarker, lastText = USFMMarker, text loadParagraph( element, sublocation ) elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( USFMMarker ): # the line begins with an internal USFM Marker -- append it to the previous line text = element.text if text is None: text = '' if BibleOrgSysGlobals.debugFlag: print( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, c, v, USFMMarker, text ) ) #halt # Not checked yet if text: loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, c, v, USFMMarker, text ) ) logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, c, v, text ) ) else: # no text loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM Marker at beginning of line (with no text)").format( self.BBB, c, v, USFMMarker ) ) logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, c, v ) ) self.addPriorityError( 97, c, v, _("Found \\{} internal USFM Marker on new line in file").format( USFMMarker ) ) #lastText += '' if lastText.endswith(' ') else ' ' # Not always good to add a space, but it's their fault! lastText = '\\' + USFMMarker + ' ' + text #print( "{} {} {} Now have {}:{!r}".format( self.BBB, c, v, lastMarker, lastText ) ) else: # the line begins with an unknown USFM Marker text = element.text if text: loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line with text: {}").format( self.BBB, c, v, USFMMarker, text ) ) logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, c, v, text ) ) else: # no text loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line (with no text").format( self.BBB, c, v, USFMMarker ) ) logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, c, v ) ) self.addPriorityError( 100, c, v, _("Found \\{} unknown USFM Marker on new line in file").format( USFMMarker ) ) for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space if USFMMarker.startswith( tryMarker ): # Let's try changing it if lastMarker: self.addLine( lastMarker, lastText ) lastMarker, lastText = tryMarker, USFMMarker[len(tryMarker):] + ' ' + text loadErrors.append( _("{} {}:{} Changed '\\{}' unknown USFM Marker to {!r} at beginning of line: {}").format( self.BBB, c, v, USFMMarker, tryMarker, text ) ) logging.warning( _("Changed '\\{}' unknown USFM Marker to {!r} after {} {}:{} at beginning of line: {}").format( USFMMarker, tryMarker, self.BBB, c, v, text ) ) break # Otherwise, don't bother processing this line -- it'll just cause more problems later on else: logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, c, v, sublocation ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
def __validateSystem(self, systemName): """ Checks for basic formatting/content errors in a Bible book name system. """ assert systemName assert self.__XMLSystems[systemName]['tree'] if len(self.__XMLSystems[systemName]["languageCode"]) != 3: logging.error( _("Couldn't find 3-letter language code in {!r} book names system" ).format(systemName)) #if self.__ISOLanguages and not self.__ISOLanguages.isValidLanguageCode( self.__XMLSystems[systemName]["languageCode"] ): # Check that we have a valid language code #logging.error( _("Unrecognized {!r} ISO-639-3 language code in {!r} book names system").format( self.__XMLSystems[systemName]["languageCode"], systemName ) ) uniqueDict = {} for index in range(0, len(self.mainElementTags)): for elementName in self.uniqueElements[index]: uniqueDict["Element_" + str(index) + "_" + elementName] = [] for attributeName in self.uniqueAttributes[index]: uniqueDict["Attribute_" + str(index) + "_" + attributeName] = [] expectedID = 1 for k, element in enumerate(self.__XMLSystems[systemName]['tree']): if element.tag in self.mainElementTags: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) index = self.mainElementTags.index(element.tag) # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes[index]: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {} in {}" ).format(attributeName, element.tag, k, systemName)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {} in {}" ).format(attributeName, element.tag, k, systemName)) # Check optional attributes on this main element for attributeName in self.optionalAttributes[index]: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {} in {}" ).format(attributeName, element.tag, k, systemName)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self.compulsoryAttributes[ index] and attributeName not in self.optionalAttributes[ index]: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {} in {}" ).format(attributeName, attributeValue, element.tag, k, systemName)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes[index]: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + str(index) + "_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {} in {}" ).format(attributeValue, attributeName, element.tag, k, systemName)) uniqueDict["Attribute_" + str(index) + "_" + attributeName].append(attributeValue) # Check compulsory elements for elementName in self.compulsoryElements[index]: if element.find(elementName) is None: logging.error( _("Compulsory {!r} element is missing (record {}) in {}" ).format(elementName, k, systemName)) if not element.find(elementName).text: logging.warning( _("Compulsory {!r} element is blank (record {}) in {}" ).format(elementName, k, systemName)) # Check optional elements for elementName in self.optionalElements[index]: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional {!r} element is blank (record {}) in {}" ).format(elementName, k, systemName)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements[ index] and subelement.tag not in self.optionalElements[ index]: logging.warning( _("Additional {!r} element ({!r}) found (record {}) in {} {}" ).format(subelement.tag, subelement.text, k, systemName, element.tag)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements[index]: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + str(index) + "_" + elementName]: myLogging = logging.info if element.tag == 'BibleDivisionNames' else logging.error myLogging( _("Found {!r} data repeated in {!r} element (record {}) in {}" ).format(text, elementName, k, systemName)) uniqueDict["Element_" + str(index) + "_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {} in {}").format( element.tag, k, systemName))
print( gsc ) # Just print a summary if BibleOrgSysGlobals.commandLineArguments.export: print( "Exports aren't written yet!" ) #hlc.exportDataToPython() # Produce the .py tables #hlc.exportDataToC() # Produce the .h tables halt if 1: # demonstrate the Greek Lexicon class if BibleOrgSysGlobals.verbosityLevel > 1: print( "\nDemonstrating the Greek Lexicon class…" ) hl = GreekLexicon( testFolder ) # Load and process the XML print( hl ) # Just print a summary print() for strongsKey in ('G1','G123','G165','G1732','G1979','G2011','G5624','G5625',): # Last one is invalid print( '\n' + strongsKey ) print( " Data:", hl.getStrongsEntryData( strongsKey ) ) print( " Pronunciation:", hl.getStrongsEntryField( strongsKey, 'pronunciation' ) ) print( " HTML:", hl.getStrongsEntryHTML( strongsKey ) ) # end of demo if __name__ == '__main__': # Configure basic set-up parser = BibleOrgSysGlobals.setup( ProgName, ProgVersion ) BibleOrgSysGlobals.addStandardOptionsAndProcess( parser, exportAvailable=True ) demo() BibleOrgSysGlobals.closedown( ProgName, ProgVersion ) # end of GreekLexicon.py
def OpenSongXMLBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for OpenSong XML Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number found. if autoLoad is true and exactly one OpenSong Bible is found, returns the loaded OpenSongXMLBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("OpenSongXMLBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks)) if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance(givenFolderName, str) if BibleOrgSysGlobals.debugFlag: assert autoLoad in ( True, False, ) if BibleOrgSysGlobals.debugFlag: assert autoLoadBooks in ( True, False, ) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("OpenSongXMLBibleFileCheck: Given {!r} folder is unreadable"). format(givenFolderName)) return False if not os.path.isdir(givenFolderName): logging.critical( _("OpenSongXMLBibleFileCheck: Given {!r} path is not a folder"). format(givenFolderName)) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print( " OpenSongXMLBibleFileCheck: Looking for files in given {}".format( givenFolderName)) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): if something == '__MACOSX': continue # don't visit these directories foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append(something) #print( 'osx1', foundFiles ) # See if there's an OpenSong project here in this folder numFound = 0 looksHopeful = False lastFilenameFound = None for thisFilename in sorted(foundFiles): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLines = BibleOrgSysGlobals.peekIntoFile(thisFilename, givenFolderName, numLines=2) #print( 'osx1b', firstLines ) if not firstLines or len(firstLines) < 2: continue if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \ and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM if BibleOrgSysGlobals.verbosityLevel > 2: print("OSB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename)) continue if not firstLines[1].startswith('<bible>'): continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("OpenSongXMLBibleFileCheck got", numFound, givenFolderName, lastFilenameFound) if numFound == 1 and (autoLoad or autoLoadBooks): osb = OpenSongXMLBible(givenFolderName, lastFilenameFound) if autoLoadBooks: osb.load() # Load and process the file return osb return numFound elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print(" Looked hopeful but no actual files found") # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if BibleOrgSysGlobals.verbosityLevel > 3: print(" OpenSongXMLBibleFileCheck: Looking for files in {}". format(tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundSubfiles.append(something) #print( 'osx2', foundSubfiles ) # See if there's an OS project here in this folder for thisFilename in sorted(foundSubfiles): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLines = BibleOrgSysGlobals.peekIntoFile(thisFilename, tryFolderName, numLines=2) if not firstLines or len(firstLines) < 2: continue if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \ and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM if BibleOrgSysGlobals.verbosityLevel > 2: print("OSB (unexpected) first line was {!r} in {}". format(firstLines, thisFilename)) continue if not firstLines[1].startswith('<bible>'): continue foundProjects.append(( tryFolderName, thisFilename, )) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("OpenSongXMLBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1 osb = OpenSongXMLBible(foundProjects[0][0], foundProjects[0][1]) # Folder and filename if autoLoadBooks: osb.load() # Load and process the file return osb return numFound
def createOpenSongXML(BibleObject, outputFolder=None, controlDict=None, validationSchema=None): """ Using settings from the given control file, converts the USFM information to a UTF-8 OpenSong XML file. This format is roughly documented at http://de.wikipedia.org/wiki/OpenSong_XML but more fields can be discovered by looking at downloaded files. """ if BibleOrgSysGlobals.verbosityLevel > 1: print("Running createOpenSongXML…") if BibleOrgSysGlobals.debugFlag: assert BibleObject.books ignoredMarkers, unhandledMarkers, unhandledBooks = set(), set(), [] def writeOpenSongBook(writerObject, BBB, bkData): """Writes a book to the OpenSong XML writerObject.""" #print( 'BIBLEBOOK', [('bnumber',BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)), ('bname',BibleOrgSysGlobals.BibleBooksCodes.getEnglishName_NR(BBB)), ('bsname',BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation(BBB))] ) OSISAbbrev = BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation( BBB) if not OSISAbbrev: logging.warning( "toOpenSong: Can't write {} OpenSong book because no OSIS code available" .format(BBB)) unhandledBooks.append(BBB) return writerObject.writeLineOpen('b', ('n', bkData.getAssumedBookNames()[0])) haveOpenChapter, startedFlag, gotVP, accumulator = False, False, None, "" C, V = '-1', '-1' # So first/id line starts at -1:0 for processedBibleEntry in bkData._processedLines: # Process internal Bible data lines marker, text, extras = processedBibleEntry.getMarker( ), processedBibleEntry.getCleanText( ), processedBibleEntry.getExtras() #print( marker, repr(text) ) #if text: assert text[0] != ' ' if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS: continue # Just ignore added markers -- not needed here if marker in USFM_PRECHAPTER_MARKERS: if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert C == '-1' or marker == 'rem' or marker.startswith( 'mte') V = str(int(V) + 1) if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in ( 'ie', ): # Just ignore these lines ignoredMarkers.add(marker) elif marker == 'c': if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) accumulator = '' if haveOpenChapter: writerObject.writeLineClose('c') C, V = text, '0' writerObject.writeLineOpen('c', ('n', text)) haveOpenChapter = True elif marker in ( 'c#', ): # These are the markers that we can safely ignore for this export ignoredMarkers.add(marker) elif marker == 'vp#': # This precedes a v field and has the verse number to be printed gotVP = text # Just remember it for now elif marker == 'v': V = text if gotVP: # this is the verse number to be published text = gotVP gotVP = None startedFlag = True if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) accumulator = '' #print( "Text {!r}".format( text ) ) if not text: logging.warning("createOpenSongXML: Missing text for v") continue verseNumberString = text.replace('<', '').replace( '>', '' ).replace( '"', '' ) # Used below but remove anything that'll cause a big XML problem later elif marker in ('mt1','mt2','mt3','mt4', 'mte1','mte2','mte3','mte4', 'ms1','ms2','ms3','ms4', ) \ or marker in USFM_ALL_INTRODUCTION_MARKERS \ or marker in ('s1','s2','s3','s4', 'r','sr','mr', 'd','sp','cd', 'cl','lit', ): ignoredMarkers.add(marker) elif marker in USFM_BIBLE_PARAGRAPH_MARKERS: if BibleOrgSysGlobals.debugFlag: assert not text and not extras ignoredMarkers.add(marker) elif marker in ( 'b', 'nb', 'ib', ): if BibleOrgSysGlobals.debugFlag: assert not text and not extras ignoredMarkers.add(marker) elif marker in ( 'v~', 'p~', ): if BibleOrgSysGlobals.debugFlag: assert text or extras if not text: # this is an empty (untranslated) verse text = '- - -' # but we'll put in a filler if startedFlag: accumulator += (' ' if accumulator else '') + BibleOrgSysGlobals.makeSafeXML(text) else: if text: logging.warning( "toOpenSong: lost text in {} field in {} {}:{} {!r}". format(marker, BBB, C, V, text)) #if BibleOrgSysGlobals.debugFlag: halt if extras: logging.warning( "toOpenSong: lost extras in {} field in {} {}:{}". format(marker, BBB, C, V)) #if BibleOrgSysGlobals.debugFlag: halt unhandledMarkers.add(marker) if extras and marker not in ( 'v~', 'p~', ) and marker not in ignoredMarkers: logging.critical( "toOpenSong: extras not handled for {} at {} {}:{}".format( marker, BBB, C, V)) if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) if haveOpenChapter: writerObject.writeLineClose('c') writerObject.writeLineClose('b') # end of createOpenSongXML.writeOpenSongBook # Set-up our Bible reference system if 'PublicationCode' not in controlDict or controlDict[ 'PublicationCode'] == 'GENERIC': BOS = BibleObject.genericBOS BRL = BibleObject.genericBRL else: BOS = BibleOrganisationalSystem(controlDict['PublicationCode']) BRL = BibleReferenceList(BOS, BibleObject=None) if BibleOrgSysGlobals.verbosityLevel > 2: print(_(" Exporting to OpenSong format…")) try: osOFn = controlDict['OpenSongOutputFilename'] except KeyError: osOFn = 'Bible.osong' filename = BibleOrgSysGlobals.makeSafeFilename(osOFn) xw = MLWriter(filename, outputFolder) xw.setHumanReadable() xw.start() xw.writeLineOpen('Bible') for BBB, bookData in BibleObject.books.items(): writeOpenSongBook(xw, BBB, bookData) xw.writeLineClose('Bible') xw.close() if ignoredMarkers: logging.info("createOpenSongXML: Ignored markers were {}".format( ignoredMarkers)) if BibleOrgSysGlobals.verbosityLevel > 2: print(" " + _("WARNING: Ignored createOpenSongXML markers were {}" ).format(ignoredMarkers)) if unhandledMarkers: logging.warning("createOpenSongXML: Unhandled markers were {}".format( unhandledMarkers)) if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("WARNING: Unhandled toOpenSong markers were {}").format( unhandledMarkers)) if unhandledBooks: logging.warning("createOpenSongXML: Unhandled books were {}".format( unhandledBooks)) if BibleOrgSysGlobals.verbosityLevel > 1: print(" " + _("WARNING: Unhandled createOpenSongXML books were {}" ).format(unhandledBooks)) # Now create a zipped version filepath = os.path.join(outputFolder, filename) if BibleOrgSysGlobals.verbosityLevel > 2: print(" Zipping {} OpenSong file…".format(filename)) zf = zipfile.ZipFile(filepath + '.zip', 'w', compression=zipfile.ZIP_DEFLATED) zf.write(filepath, filename) zf.close() if validationSchema: return xw.validate(validationSchema) if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1: print(" createOpenSongXML finished successfully.") return True
def CSVBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for CSV Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one CSV Bible is found, returns the loaded CSVBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("CSVBibleFileCheck( {}, {}, {} )".format(givenFolderName, strictCheck, autoLoad)) if BibleOrgSysGlobals.debugFlag: assert (givenFolderName and isinstance(givenFolderName, str)) if BibleOrgSysGlobals.debugFlag: assert (autoLoad in ( True, False, ) and autoLoadBooks in ( True, False, )) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("CSVBibleFileCheck: Given {} folder is unreadable").format( repr(givenFolderName))) return False if not os.path.isdir(givenFolderName): logging.critical( _("CSVBibleFileCheck: Given {} path is not a folder").format( repr(givenFolderName))) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print(" CSVBibleFileCheck: Looking for files in given {}".format( repr(givenFolderName))) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append(something) if '__MACOSX' in foundFolders: foundFolders.remove('__MACOSX') # don't visit these directories # See if there's an CSV Bible here in this given folder numFound = 0 looksHopeful = False lastFilenameFound = None for thisFilename in sorted(foundFiles): if thisFilename in ('book_names.txt', 'Readme.txt'): looksHopeful = True elif thisFilename.endswith('.txt'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName) if firstLine is None: continue # seems we couldn't decode the file if not firstLine.startswith( '"Book","Chapter","Verse",' ) and not firstLine.startswith( '"1","1","1",') \ and not firstLine.startswith( 'Book,Chapter,Verse,' ) and not firstLine.startswith( '1,1,1,'): if BibleOrgSysGlobals.verbosityLevel > 2: print( "CSVBibleFileCheck: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("CSVBibleFileCheck got", numFound, givenFolderName, lastFilenameFound) if numFound == 1 and (autoLoad or autoLoadBooks): uB = CSVBible(givenFolderName, lastFilenameFound[:-4] ) # Remove the end of the actual filename ".txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print(" Looked hopeful but no actual files found") # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning( _("CSVBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName)) continue if BibleOrgSysGlobals.verbosityLevel > 3: print(" CSVBibleFileCheck: Looking for files in {}".format( tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundSubfiles.append(something) # See if there's an CSV Bible here in this folder for thisFilename in sorted(foundSubfiles): if thisFilename.endswith('.txt'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName) if firstLine is None: continue # seems we couldn't decode the file if not firstLine.startswith("Ge 1:1 "): if BibleOrgSysGlobals.verbosityLevel > 2: print( "CSVBibleFileCheck: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) if debuggingThisModule: halt continue foundProjects.append(( tryFolderName, thisFilename, )) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("CSVBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert (len(foundProjects) == 1) uB = CSVBible( foundProjects[0][0], foundProjects[0][1] [:-4]) # Remove the end of the actual filename ".txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def HaggaiXMLBibleFileCheck( givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False ): """ Given a folder, search for Haggai XML Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number found. if autoLoad is true and exactly one Haggai Bible is found, returns the loaded HaggaiXMLBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print( "HaggaiXMLBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks ) ) if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance( givenFolderName, str ) if BibleOrgSysGlobals.debugFlag: assert autoLoad in (True,False,) # Check that the given folder is readable if not os.access( givenFolderName, os.R_OK ): logging.critical( _("HaggaiXMLBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName ) ) return False if not os.path.isdir( givenFolderName ): logging.critical( _("HaggaiXMLBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName ) ) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print( " HaggaiXMLBibleFileCheck: Looking for files in given {}".format( givenFolderName ) ) foundFolders, foundFiles = [], [] for something in os.listdir( givenFolderName ): somepath = os.path.join( givenFolderName, something ) if os.path.isdir( somepath ): if something == '__MACOSX': continue # don't visit these directories foundFolders.append( something ) elif os.path.isfile( somepath ): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith( ending): ignore=True; break if ignore: continue if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append( something ) #print( 'ff', foundFiles ) # See if there's an Haggai project here in this folder numFound = 0 looksHopeful = False lastFilenameFound = None for thisFilename in sorted( foundFiles ): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName, numLines=2 ) if not firstLines or len(firstLines)<2: continue if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \ and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM if BibleOrgSysGlobals.verbosityLevel > 2: print( "HB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) ) continue if 'haggai_' not in firstLines[1]: continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "HaggaiXMLBibleFileCheck got", numFound, givenFolderName, lastFilenameFound ) if numFound == 1 and (autoLoad or autoLoadBooks): ub = HaggaiXMLBible( givenFolderName, lastFilenameFound ) if autoLoadBooks: ub.load() # Load and process the file return ub return numFound elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print( " Looked hopeful but no actual files found" ) # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted( foundFolders ): tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' ) if BibleOrgSysGlobals.verbosityLevel > 3: print( " HaggaiXMLBibleFileCheck: Looking for files in {}".format( tryFolderName ) ) foundSubfolders, foundSubfiles = [], [] for something in os.listdir( tryFolderName ): somepath = os.path.join( givenFolderName, thisFolderName, something ) if os.path.isdir( somepath ): foundSubfolders.append( something ) elif os.path.isfile( somepath ): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper ) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith( ending): ignore=True; break if ignore: continue if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot foundSubfiles.append( something ) #print( 'fsf', foundSubfiles ) # See if there's an OS project here in this folder for thisFilename in sorted( foundSubfiles ): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName, numLines=2 ) if not firstLines or len(firstLines)<2: continue if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \ and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM if BibleOrgSysGlobals.verbosityLevel > 2: print( "HB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) ) continue if 'haggai_' not in firstLines[1]: continue foundProjects.append( (tryFolderName, thisFilename,) ) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print( "HaggaiXMLBibleFileCheck foundProjects", numFound, foundProjects ) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1 ub = HaggaiXMLBible( foundProjects[0][0], foundProjects[0][1] ) # Folder and filename if autoLoadBooks: ub.load() # Load and process the file return ub return numFound
assert mark not in otherMarks assert mark not in cantillationMarks for j, mark in enumerate(otherMarks): #print( j, mark ) assert otherMarks.count(mark) == 1 assert mark not in consonants assert mark not in vowelPoints assert mark not in cantillationMarks for j, mark in enumerate(cantillationMarks): #print( j, mark ) assert cantillationMarks.count(mark) == 1 assert mark not in consonants assert mark not in vowelPoints assert mark not in otherMarks BibleOrgSysGlobals.printUnicodeInfo(vowelPoints, "Vowel points") BibleOrgSysGlobals.printUnicodeInfo(cantillationMarks, "Cantillation marks") class Hebrew(): """ Class for handling a Hebrew string. """ def __init__(self, text): """ Create an new Hebrew object. """ self.originalText = text self.currentText = text # end of Hebrew.__init__
def loadParagraph( paragraphXML, paragraphlocation ): """ Load a paragraph from the USX XML. In this context, paragraph means heading and intro lines, as well as paragraphs of verses. Uses (and updates) C,V information from the containing function. """ nonlocal C, V # Process the attributes first paragraphStyle = None for attrib,value in paragraphXML.items(): if attrib=='style': paragraphStyle = value # This is basically the USFM marker name else: logging.warning( _("CH46 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) # Now process the paragraph text (or write a paragraph marker anyway) paragraphText = paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else '' if version is None: paragraphText = paragraphText.rstrip() # Don't need to strip extra spaces in v2 self.addLine( paragraphStyle, paragraphText ) # Now process the paragraph subelements for element in paragraphXML: location = element.tag + ' ' + paragraphlocation #print( "USXXMLBibleBook.load", C, V, element.tag, location ) if element.tag == 'verse': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first verseStyle = altNumber = None for attrib,value in element.items(): if attrib=='number': V = value elif attrib=='style': verseStyle = value elif attrib=='altnumber': altNumber = value else: logging.error( _("KR60 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if verseStyle != 'v': logging.error( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) ) #if altNumber: print( repr(verseStyle), repr(altNumber) ); halt altStuff = ' \\va {}\\va*'.format( altNumber ) if altNumber else '' self.addLine( verseStyle, V + altStuff + ' ' ) # Now process the tail (if there's one) which is the verse text if element.tail: vText = element.tail if vText[0]=='\n': vText = vText.lstrip() # Paratext puts cross references on a new line if vText: #print( repr(vText) ) self.appendToLastLine( vText ) elif element.tag == 'char': # Process the attributes first charStyle = None for attrib,value in element.items(): if attrib=='style': charStyle = value # This is basically the USFM character marker name #print( " charStyle", charStyle ) assert not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle ) else: logging.error( _("QU52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) charLine = "\\{} {} ".format( charStyle, element.text ) # Now process the subelements -- chars are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( '{} {}:{} {}'.format( self.BBB, C, V, element.tag ) ) if subelement.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first subCharStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': subCharStyle = value elif attrib=='closed': assert value=='false' charClosed = False else: logging.error( _("KS41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) charLine += "\\{} {}".format( subCharStyle, subelement.text ) if charClosed: charLine += "\\{}*".format( subCharStyle ) #if subelement.tail is not None: print( " tail1", repr(subelement.tail) ) charLine += '' if subelement.tail is None else subelement.tail else: logging.error( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) ) # A character field must be added to the previous field #if element.tail is not None: print( " tail2", repr(element.tail) ) charTail = '' if element.tail: charTail = element.tail if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts footnote parts on new lines charLine += "\\{}*{}".format( charStyle, charTail ) #if debuggingThisModule: print( "USX.loadParagraph:", C, V, paragraphStyle, charStyle, repr(charLine) ) self.appendToLastLine( charLine ) elif element.tag == 'note': #print( "NOTE", BibleOrgSysGlobals.elementStr( element ) ) # Process the attributes first noteStyle = noteCaller = None for attrib,value in element.items(): if attrib=='style': noteStyle = value # This is basically the USFM marker name assert noteStyle in ('x','f',) elif attrib=='caller': noteCaller = value # Usually hyphen or a symbol to be used for the note else: logging.error( _("CY38 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if noteCaller=='' and self.BBB=='NUM' and C=='10' and V=='36': noteCaller = '+' # Hack assert noteStyle and noteCaller # both compulsory noteLine = "\\{} {} ".format( noteStyle, noteCaller ) if element.text: noteText = element.text.strip() noteLine += noteText # Now process the subelements -- notes are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( C, V, subelement.tag ) if subelement.tag == 'char': # milestone (not a container) # Process the attributes first charStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': charStyle = value elif attrib=='closed': assert value=='false' charClosed = False else: logging.warning( _("GJ67 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) noteLine += "\\{} {}".format( charStyle, subelement.text ) # Now process the subelements -- notes are one of the few multiply embedded fields in USX for sub2element in subelement: sub2location = sub2element.tag + ' ' + sublocation #print( C, V, sub2element.tag ) if sub2element.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location ) # Process the attributes first char2Style, char2Closed = None, True for attrib,value in sub2element.items(): if attrib=='style': char2Style = value elif attrib=='closed': assert value=='false' char2Closed = False else: logging.warning( _("VH36 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) ) assert char2Closed noteLine += "\\{} {}\\{}*{}".format( char2Style, sub2element.text, char2Style, sub2element.tail if sub2element.tail else '' ) if charClosed: noteLine += "\\{}*".format( charStyle ) if subelement.tail: charTail = subelement.tail if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts cross reference parts on a new line noteLine += charTail elif subelement.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first unmmatchedMarker = None for attrib,value in subelement.items(): if attrib=='marker': unmmatchedMarker = value else: logging.warning( _("NV21 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) self.addPriorityError( 2, C, V, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) ) else: logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) ) if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail #noteLine += "\\{}*".format( charStyle ) noteLine += "\\{}*".format( noteStyle ) if element.tail: #if '\n' in element.tail: halt noteTail = element.tail if noteTail[0]=='\n': noteTail = noteTail.lstrip() # Paratext puts multiple cross-references on new lines noteLine += noteTail #print( "NoteLine", repr(noteLine) ) self.appendToLastLine( noteLine ) elif element.tag == 'link': # Used to include extra resources BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first linkStyle = linkDisplay = linkTarget = None for attrib,value in element.items(): if attrib=='style': linkStyle = value assert linkStyle in ('jmp',) elif attrib=='display': linkDisplay = value # e.g., "click here" elif attrib=='target': linkTarget = value # e.g., some reference else: logging.warning( _("KW54 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.addPriorityError( 3, C, V, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) ) elif element.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) self.addPriorityError( 2, C, V, _("Unmatched element in {}").format( location) ) else: logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, C, V, location ) ) self.addPriorityError( 1, C, V, _("Unprocessed {} element").format( element.tag ) ) for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] ) if BibleOrgSysGlobals.debugFlag: halt
def ForgeForSwordSearcherBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for ForgeForSwordSearcher Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one ForgeForSwordSearcher Bible is found, returns the loaded ForgeForSwordSearcherBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("ForgeForSwordSearcherBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks)) if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance(givenFolderName, str) if BibleOrgSysGlobals.debugFlag: assert autoLoad in ( True, False, ) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("ForgeForSwordSearcherBibleFileCheck: Given {} folder is unreadable" ).format(repr(givenFolderName))) return False if not os.path.isdir(givenFolderName): logging.critical( _("ForgeForSwordSearcherBibleFileCheck: Given {} path is not a folder" ).format(repr(givenFolderName))) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print( " ForgeForSwordSearcherBibleFileCheck: Looking for files in given {}" .format(repr(givenFolderName))) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): if something == '__MACOSX': continue # don't visit these directories foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append(something) # See if there's an ForgeForSwordSearcherBible project here in this given folder numFound = 0 looksHopeful = False lastFilenameFound = None for thisFilename in sorted(foundFiles): if thisFilename in ('book_names.txt', 'Readme.txt'): looksHopeful = True elif thisFilename.endswith('.txt'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName) #print( '1', repr(firstLine) ) if firstLine is None: continue # seems we couldn't decode the file if firstLine and firstLine[0] == chr(65279): #U+FEFF or \ufeff logging.info( "ForgeForSwordSearcherBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}" .format(thisFilename)) firstLine = firstLine[ 1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search('^; TITLE:\\s', firstLine) if match: if BibleOrgSysGlobals.debugFlag: print( "ForgeForSwordSearcherBibleFileCheck First line got {!r} match from {!r}" .format(match.group(0), firstLine)) else: if BibleOrgSysGlobals.verbosityLevel > 3: print( "ForgeForSwordSearcherBibleFileCheck: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("ForgeForSwordSearcherBibleFileCheck got", numFound, givenFolderName, lastFilenameFound) if numFound == 1 and (autoLoad or autoLoadBooks): uB = ForgeForSwordSearcherBible( givenFolderName, lastFilenameFound[:-4] ) # Remove the end of the actual filename ".txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print(" Looked hopeful but no actual files found") # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning( _("ForgeForSwordSearcherBibleFileCheck: {!r} subfolder is unreadable" ).format(tryFolderName)) continue if BibleOrgSysGlobals.verbosityLevel > 3: print( " ForgeForSwordSearcherBibleFileCheck: Looking for files in {}" .format(tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[ 1:] in extensionsToIgnore: # Compare without the first dot foundSubfiles.append(something) # See if there's an ForgeForSwordSearcherBible here in this folder for thisFilename in sorted(foundSubfiles): if thisFilename.endswith('.txt'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName) #print( '2', repr(firstLine) ) if firstLine is None: continue # seems we couldn't decode the file if firstLine and firstLine[0] == chr( 65279): #U+FEFF or \ufeff logging.info( "ForgeForSwordSearcherBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}" .format(thisFilename)) firstLine = firstLine[ 1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search('^; TITLE:\\s', firstLine) if match: if BibleOrgSysGlobals.debugFlag: print( "ForgeForSwordSearcherBibleFileCheck First line got type {!r} match from {!r}" .format(match.group(0), firstLine)) else: if BibleOrgSysGlobals.verbosityLevel > 3: print( "ForgeForSwordSearcherBibleFileCheck: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue foundProjects.append(( tryFolderName, thisFilename, )) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("ForgeForSwordSearcherBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1 uB = ForgeForSwordSearcherBible( foundProjects[0][0], foundProjects[0][1] [:-4]) # Remove the end of the actual filename ".txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def validateEntry( self, entry ): """ Check/validate the given Strongs Greek lexicon entry. """ if BibleOrgSysGlobals.debugFlag: assert( entry.tag == "entry" ) BibleOrgSysGlobals.checkXMLNoText( entry, entry.tag, "na19" ) BibleOrgSysGlobals.checkXMLNoTail( entry, entry.tag, "kaq9" ) # Process the entry attributes first strongs5 = None for attrib,value in entry.items(): if attrib == "strongs": strongs5 = value if BibleOrgSysGlobals.verbosityLevel > 2: print( "Validating {} entry...".format( strongs5 ) ) else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( len(strongs5)==5 and strongs5.isdigit() ) entryResults = {} entryString = "" gettingEssentials = True for j, element in enumerate( entry ): #print( strongs5, j, element.tag, repr(entryString) ) if element.tag == "strongs": if BibleOrgSysGlobals.debugFlag: assert( gettingEssentials and j==0 and element.text ) BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d" ) if strongs5!='02717' and (3203 > int(strongs5) > 3302): BibleOrgSysGlobals.checkXMLNoTail( element, element.tag, "f3g7" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g" ) strongs = element.text if BibleOrgSysGlobals.debugFlag: assert( strongs5.endswith( strongs ) ) if element.tail and element.tail.strip(): entryString += element.tail.strip() elif element.tag == "greek": location = "greek in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35" ) # Process the attributes translit = greek = beta = None for attrib,value in element.items(): if attrib=="translit": translit = value elif attrib=="unicode": greek = value elif attrib=="BETA": beta = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert( greek and translit and beta ) if 'word' not in entryResults: # This is the first/main entry if BibleOrgSysGlobals.debugFlag: assert( gettingEssentials and j==1 ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) entryResults['word'] = (greek, translit, beta) else: #print( "Have multiple greek entries in " + strongs5 ) if BibleOrgSysGlobals.debugFlag: assert( j > 2 ) gettingEssentials = False entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ) #.replace( '\n', '' ) elif element.tag == "pronunciation": location = "pronunciation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes pronunciation = None for attrib,value in element.items(): if attrib=="strongs": pronunciation = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if gettingEssentials: #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) if BibleOrgSysGlobals.debugFlag: assert( j == 2 ) assert( pronunciation ) assert( 'pronunciation' not in entryResults ) entryResults['pronunciation'] = pronunciation else: if BibleOrgSysGlobals.debugFlag: assert( j>2 and not gettingEssentials ) if element.tail and element.tail.strip(): entryString += element.tail.strip().replace( '\n', '' ) elif element.tag == "strongs_derivation": location = "strongs_derivation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "derivation", repr(derivation) ) if BibleOrgSysGlobals.debugFlag: assert( derivation and '\t' not in derivation and '\n' not in derivation ) entryString += derivation elif element.tag == "strongs_def": location = "strongs_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "jd28" ) definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "definition", repr(definition) ) if BibleOrgSysGlobals.debugFlag: assert( definition and '\t' not in definition and '\n' not in definition ) entryString += definition elif element.tag == "kjv_def": location = "kjv_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" ) #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" ) KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert( KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition ) entryString += KJVdefinition elif element.tag == "strongsref": location = "strongsref in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "kls2" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24" ) strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) if BibleOrgSysGlobals.debugFlag: assert( strongsRef and '\t' not in strongsRef and '\n' not in strongsRef ) strongsRef = re.sub( '<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) strongsRef = re.sub( '<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #print( strongs5, "strongsRef", repr(strongsRef) ) entryString += ' ' + strongsRef elif element.tag == "see": location = "see in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes seeLanguage = seeStrongsNumber = None for attrib,value in element.items(): if attrib == "language": seeLanguage = value elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert( seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit() ) assert( seeLanguage in ('GREEK','HEBREW',) ) if 'see' not in entryResults: entryResults['see'] = [] entryResults['see'].append( ('G' if seeLanguage=='GREEK' else 'H') + seeStrongsNumber ) else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text ) ) if entryString: #print( strongs5, "entryString", repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert( '\t' not in entryString and '\n' not in entryString ) entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) if BibleOrgSysGlobals.debugFlag: assert( 'strongsref' not in entryString ) entryResults['Entry'] = entryString #print( "entryResults", entryResults ) self.StrongsEntries[strongs] = entryResults
def load(self): """ Load a single source XML file and load book elements. """ if BibleOrgSysGlobals.verbosityLevel > 2: print(_("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert len( self.XMLTree) # Fail here if we didn't load anything at all if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VerseView'] = {} # Find the main (bible) container if self.XMLTree.tag == VerseViewXMLBible.treeTag: location = "VerseView XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location, 'js24') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') # Find the submain (various info and then book) containers bookNumber = 0 for element in self.XMLTree: if element.tag == VerseViewXMLBible.filenameTag: sublocation = "filename in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') #self.filename = element.text elif element.tag == VerseViewXMLBible.revisionTag: sublocation = "revision in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Revision'] = element.text elif element.tag == VerseViewXMLBible.titleTag: sublocation = "title in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Title'] = element.text elif element.tag == VerseViewXMLBible.fontTag: sublocation = "font in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Font'] = element.text elif element.tag == VerseViewXMLBible.copyrightTag: sublocation = "copyright in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Copyright'] = element.text elif element.tag == VerseViewXMLBible.sizefactorTag: sublocation = "sizefactor in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') if BibleOrgSysGlobals.debugFlag: assert element.text == '1' elif element.tag == VerseViewXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') bookNumber += 1 self.__validateAndExtractBook(element, bookNumber) else: logging.error( "xk15 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( VerseViewXMLBible.treeTag, self.XMLTree.tag)) if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: # These are all compulsory so they should all exist #print( "Filename is {!r}".format( self.filename ) ) print("Revision is {!r}".format( self.suppliedMetadata['VerseView']['Revision'])) print("Title is {!r}".format( self.suppliedMetadata['VerseView']['Title'])) print("Font is {!r}".format( self.suppliedMetadata['VerseView']['Font'])) print("Copyright is {!r}".format( self.suppliedMetadata['VerseView']['Copyright'])) #print( "SizeFactor is {!r}".format( self.sizeFactor ) ) self.applySuppliedMetadata( 'VerseView') # Copy some to self.settingsDict self.doPostLoadProcessing()
def __validateAndExtractHeader( self ): """ Extracts information out of the header record, such as: <INFORMATION> <title>King James Version</title> <creator></creator> <subject>The Holy Bible</subject> <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description> <publisher>FREE BIBLE SOFTWARE GROUP</publisher> <contributors /> <date>2009-01-23</date> <type>Bible</type> <format>Haggai XML Bible Markup Language</format> <identifier>kjv</identifier> <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source> <language>ENG</language> <coverage>provide the Bible to the nations of the world</coverage> <rights>We believe that this Bible is found in the Public Domain.</rights> </INFORMATION> """ if BibleOrgSysGlobals.debugFlag: assert self.header location = 'Header' BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' ) BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' ) BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' ) # TODO: We probably need to rationalise some of the self.xxx stores for element in self.header: #print( "header", element.tag ) if element.tag == 'title': sublocation = "title in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.title = element.text elif element.tag == 'creator': sublocation = "creator in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.creator = element.text elif element.tag == 'subject': sublocation = "subject in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.subject = element.text elif element.tag == 'description': sublocation = "description in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.description = element.text elif element.tag == 'publisher': sublocation = "publisher in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.publisher = element.text elif element.tag == 'contributor': sublocation = "contributor in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' ) if element.text: try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one elif element.tag == 'contributors': sublocation = "contributors in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.contributors = element.text elif element.tag == 'date': sublocation = "date in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.date = element.text elif element.tag == 'type': sublocation = "type in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.documentType = element.text elif element.tag == 'format': sublocation = "format in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text if BibleOrgSysGlobals.debugFlag: assert element.text == 'Haggai XML Bible Markup Language' elif element.tag == 'identifier': sublocation = "identifier in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.identifier = element.text elif element.tag == 'source': sublocation = "source in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.source = element.text elif element.tag == 'language': sublocation = "language in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if BibleOrgSysGlobals.debugFlag: assert element.text self.language = element.text elif element.tag == 'coverage': sublocation = "coverage in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.coverage = element.text elif element.tag == 'rights': sublocation = "rights in {}".format( location ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' ) if element.text: self.rights = element.text else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) )
def getChangeLogFilepath(loggingFolder, projectName): """ """ return os.path.join( loggingFolder, \ BibleOrgSysGlobals.makeSafeFilename( projectName.replace(' ','_') + '_ChangeLog.txt' ) )
def YETBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for YET Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one YET Bible is found, returns the loaded YETBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("YETBibleFileCheck( {}, {}, {} )".format(givenFolderName, strictCheck, autoLoad)) if BibleOrgSysGlobals.debugFlag: assert (givenFolderName and isinstance(givenFolderName, str)) if BibleOrgSysGlobals.debugFlag: assert (autoLoad in ( True, False, )) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical( _("YETBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName)) return False if not os.path.isdir(givenFolderName): logging.critical( _("YETBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName)) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print(" YETBibleFileCheck: Looking for files in given {}".format( givenFolderName)) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) if somethingUpperExt in filenameEndingsToAccept: foundFiles.append(something) if '__MACOSX' in foundFolders: foundFolders.remove('__MACOSX') # don't visit these directories # See if there's an YETBible project here in this given folder numFound = 0 lastFilenameFound = None for thisFilename in sorted(foundFiles): if thisFilename.endswith('.yet'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName) if not firstLine.startswith("info\t"): if BibleOrgSysGlobals.verbosityLevel > 2: print( "YETBible (unexpected) first line was {!r} in {}". format(firstLine, thisFilename)) continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("YETBibleFileCheck got", numFound, givenFolderName, lastFilenameFound) if numFound == 1 and (autoLoad or autoLoadBooks): uB = YETBible(givenFolderName, lastFilenameFound[:-4] ) # Remove the end of the actual filename ".yet" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + '/') if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning( _("YETBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName)) continue if BibleOrgSysGlobals.verbosityLevel > 3: print(" YETBibleFileCheck: Looking for files in {}".format( tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper) if somethingUpperExt in filenameEndingsToAccept: foundSubfiles.append(something) # See if there's an YETBible project here in this folder for thisFilename in sorted(foundSubfiles): if thisFilename.endswith('.yet'): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName) if not firstLine.startswith("info\t"): if BibleOrgSysGlobals.verbosityLevel > 2: print( "YETBible (unexpected) first line was {!r} in {}" .format(firstLine, thisFilname)) halt continue foundProjects.append(( tryFolderName, thisFilename, )) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("YETBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert (len(foundProjects) == 1) uB = YETBible( foundProjects[0][0], foundProjects[0][1] [:-9]) # Remove the end of the actual filename "_utf8.txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def _validate(self): """ Check/validate the loaded data. """ assert (self._XMLtree) uniqueDict = {} #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] for j, element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: BibleOrgSysGlobals.checkXMLNoText(element, element.tag) BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) BibleOrgSysGlobals.checkXMLNoSubelements(element, element.tag) # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( "Compulsory {!r} attribute is missing from {} element in record {}" .format(attributeName, element.tag, j)) if not attributeValue and attributeName != "type": logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}" .format(attributeName, element.tag, j)) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( "Optional {!r} attribute is blank on {} element in record {}" .format(attributeName, element.tag, j)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}" .format(attributeName, attributeValue, element.tag, j)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None and attributeName != "reference_name": if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}" .format(attributeValue, attributeName, element.tag, j)) uniqueDict["Attribute_" + attributeName].append(attributeValue) else: logging.warning("Unexpected element: {} in record {}".format( element.tag, j))
def UnboundBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False): """ Given a folder, search for Unbound Bible files or folders in the folder and in the next level down. Returns False if an error is found. if autoLoad is false (default) returns None, or the number of Bibles found. if autoLoad is true and exactly one Unbound Bible is found, returns the loaded UnboundBible object. """ if BibleOrgSysGlobals.verbosityLevel > 2: print("UnboundBibleFileCheck( {}, {}, {}, {} )".format(givenFolderName, strictCheck, autoLoad, autoLoadBooks)) if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance(givenFolderName, str) if BibleOrgSysGlobals.debugFlag: assert autoLoad in (True, False) # Check that the given folder is readable if not os.access(givenFolderName, os.R_OK): logging.critical(_("UnboundBibleFileCheck: Given {!r} folder is unreadable").format(givenFolderName)) return False if not os.path.isdir(givenFolderName): logging.critical(_("UnboundBibleFileCheck: Given {!r} path is not a folder").format(givenFolderName)) return False # Find all the files and folders in this folder if BibleOrgSysGlobals.verbosityLevel > 3: print(" UnboundBibleFileCheck: Looking for files in given {}".format(givenFolderName)) foundFolders, foundFiles = [], [] for something in os.listdir(givenFolderName): somepath = os.path.join(givenFolderName, something) if os.path.isdir(somepath): if something == "__MACOSX": continue # don't visit these directories foundFolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext(somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot foundFiles.append(something) # See if there's an UnboundBible project here in this given folder numFound = 0 looksHopeful = False lastFilenameFound = None for thisFilename in sorted(foundFiles): if thisFilename in ("book_names.txt", "Readme.txt"): looksHopeful = True elif thisFilename.endswith("_utf8.txt"): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile(thisFilename, givenFolderName) if firstLine is None: continue # seems we couldn't decode the file if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)": if BibleOrgSysGlobals.verbosityLevel > 2: print("UB (unexpected) first line was {!r} in {}".format(firstLine, thisFilename)) continue lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("UnboundBibleFileCheck got", numFound, givenFolderName, lastFilenameFound) if numFound == 1 and (autoLoad or autoLoadBooks): uB = UnboundBible( givenFolderName, lastFilenameFound[:-9] ) # Remove the end of the actual filename "_utf8.txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print(" Looked hopeful but no actual files found") # Look one level down numFound = 0 foundProjects = [] for thisFolderName in sorted(foundFolders): tryFolderName = os.path.join(givenFolderName, thisFolderName + "/") if not os.access(tryFolderName, os.R_OK): # The subfolder is not readable logging.warning(_("UnboundBibleFileCheck: {!r} subfolder is unreadable").format(tryFolderName)) continue if BibleOrgSysGlobals.verbosityLevel > 3: print(" UnboundBibleFileCheck: Looking for files in {}".format(tryFolderName)) foundSubfolders, foundSubfiles = [], [] for something in os.listdir(tryFolderName): somepath = os.path.join(givenFolderName, thisFolderName, something) if os.path.isdir(somepath): foundSubfolders.append(something) elif os.path.isfile(somepath): somethingUpper = something.upper() somethingUpperProper, somethingUpperExt = os.path.splitext(somethingUpper) ignore = False for ending in filenameEndingsToIgnore: if somethingUpper.endswith(ending): ignore = True break if ignore: continue if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot foundSubfiles.append(something) # See if there's an UB project here in this folder for thisFilename in sorted(foundSubfiles): if thisFilename.endswith("_utf8.txt"): if strictCheck or BibleOrgSysGlobals.strictCheckingFlag: firstLine = BibleOrgSysGlobals.peekIntoFile(thisFilename, tryFolderName) if firstLine is None: continue # seems we couldn't decode the file if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)": if BibleOrgSysGlobals.verbosityLevel > 2: print("UB (unexpected) first line was {!r} in {}".format(firstLine, thisFilename)) halt continue foundProjects.append((tryFolderName, thisFilename)) lastFilenameFound = thisFilename numFound += 1 if numFound: if BibleOrgSysGlobals.verbosityLevel > 2: print("UnboundBibleFileCheck foundProjects", numFound, foundProjects) if numFound == 1 and (autoLoad or autoLoadBooks): if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1 uB = UnboundBible( foundProjects[0][0], foundProjects[0][1][:-9] ) # Remove the end of the actual filename "_utf8.txt" if autoLoadBooks: uB.load() # Load and process the file return uB return numFound
def demo(): """ Demonstrate reading and checking some Bible databases. """ if BibleOrgSysGlobals.verbosityLevel > 0: print(ProgNameVersion) #testFolder = "Tests/DataFilesForTests/BCVTest1/" testFolder = "OutputFiles/BOS_BCV_Export/" if 1: # demo the file checking code -- first with the whole folder and then with only one folder if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV TestA1") result1 = BCVBibleFileCheck(testFolder) if BibleOrgSysGlobals.verbosityLevel > 1: print("BCV TestA1", result1) if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV TestA2") result2 = BCVBibleFileCheck(testFolder, autoLoad=True) # But doesn't preload books if BibleOrgSysGlobals.verbosityLevel > 1: print("BCV TestA2", result2) #result2.loadMetadataFile( os.path.join( testFolder, "BooknamesMetadata.txt" ) ) if BibleOrgSysGlobals.strictCheckingFlag: result2.check() #print( UsfmB.books['GEN']._processedLines[0:40] ) bibleErrors = result2.getErrors() # print( bibleErrors ) #if BibleOrgSysGlobals.commandLineArguments.export: ###result2.toDrupalBible() #result2.doAllExports( wantPhotoBible=False, wantODFs=False, wantPDFs=False ) if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV TestA3") result3 = BCVBibleFileCheck(testFolder, autoLoad=True, autoLoadBooks=True) if BibleOrgSysGlobals.verbosityLevel > 1: print("BCV TestA3", result3) #result3.loadMetadataFile( os.path.join( testFolder, "BooknamesMetadata.txt" ) ) if BibleOrgSysGlobals.strictCheckingFlag: result3.check() #print( UsfmB.books['GEN']._processedLines[0:40] ) bibleErrors = result3.getErrors() # print( bibleErrors ) if BibleOrgSysGlobals.commandLineArguments.export: ##result3.toDrupalBible() result3.doAllExports(wantPhotoBible=False, wantODFs=False, wantPDFs=False) if 0: # all discovered modules in the test folder foundFolders, foundFiles = [], [] for something in os.listdir(testFolder): somepath = os.path.join(testFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) if BibleOrgSysGlobals.maxProcesses > 1: # Get our subprocesses ready and waiting for work if BibleOrgSysGlobals.verbosityLevel > 1: print("\nTrying all {} discovered modules…".format( len(foundFolders))) parameters = [folderName for folderName in sorted(foundFolders)] BibleOrgSysGlobals.alreadyMultiprocessing = True with multiprocessing.Pool(processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map(testBCV, parameters) # have the pool do our loads assert len(results) == len( parameters ) # Results (all None) are actually irrelevant to us here BibleOrgSysGlobals.alreadyMultiprocessing = False else: # Just single threaded for j, someFolder in enumerate(sorted(foundFolders)): if BibleOrgSysGlobals.verbosityLevel > 1: print("\nBCV D{}/ Trying {}".format(j + 1, someFolder)) #myTestFolder = os.path.join( testFolder, someFolder+'/' ) testBCV(someFolder) if 0: # Load and process some of our test versions count = 0 for name, encoding, testFolder in ( ("Matigsalug", 'utf-8', "Tests/DataFilesForTests/BCVTest1/"), ("Matigsalug", 'utf-8', "Tests/DataFilesForTests/BCVTest2/"), ("Exported", 'utf-8', "Tests/BOS_BCV_Export/"), ): count += 1 if os.access(testFolder, os.R_OK): if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV A{}/".format(count)) bcvB = BCVBible(testFolder, name, encoding=encoding) bcvB.load() if BibleOrgSysGlobals.verbosityLevel > 1: print("Gen assumed book name:", repr(bcvB.getAssumedBookName('GEN'))) print("Gen long TOC book name:", repr(bcvB.getLongTOCName('GEN'))) print("Gen short TOC book name:", repr(bcvB.getShortTOCName('GEN'))) print("Gen book abbreviation:", repr(bcvB.getBooknameAbbreviation('GEN'))) if BibleOrgSysGlobals.verbosityLevel > 0: print(bcvB) if BibleOrgSysGlobals.strictCheckingFlag: bcvB.check() #print( UsfmB.books['GEN']._processedLines[0:40] ) bcbibleErrors = bcvB.getErrors() # print( bcbibleErrors ) if BibleOrgSysGlobals.commandLineArguments.export: ##bcvB.toDrupalBible() bcvB.doAllExports(wantPhotoBible=False, wantODFs=False, wantPDFs=False) newObj = BibleOrgSysGlobals.unpickleObject( BibleOrgSysGlobals.makeSafeFilename(name) + '.pickle', os.path.join("OutputFiles/", "BOS_Bible_Object_Pickle/")) if BibleOrgSysGlobals.verbosityLevel > 0: print("newObj is", newObj) else: print( "\nSorry, test folder {!r} is not readable on this computer." .format(testFolder))
def __load(self, XMLFilepath): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert (XMLFilepath) self.__XMLFilepath = XMLFilepath assert (self._XMLtree is None or len(self._XMLtree) == 0 ) # Make sure we're not doing this twice if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading BibleReferencesLinks XML file from {!r}...").format( self.__XMLFilepath)) self._XMLtree = ElementTree().parse(self.__XMLFilepath) assert (self._XMLtree) # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLtree.remove(header) BibleOrgSysGlobals.checkXMLNoText(header, "header") BibleOrgSysGlobals.checkXMLNoTail(header, "header") BibleOrgSysGlobals.checkXMLNoAttributes(header, "header") if len(header) > 1: logging.info(_("Unexpected elements in header")) elif len(header) == 0: logging.info(_("Missing work element in header")) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText(work, "work in header") BibleOrgSysGlobals.checkXMLNoTail(work, "work in header") BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header") if work.tag == "work": self.ProgVersion = work.find("version").text self.dateString = work.find("date").text self.titleString = work.find("title").text else: logging.warning(_("Missing work element in header")) else: logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag))) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( element.tail)) else: logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag))
def validateEntry( self, entry ): """ Check/validate the given Strongs Greek lexicon entry. """ if BibleOrgSysGlobals.debugFlag: assert entry.tag == "entry" BibleOrgSysGlobals.checkXMLNoText( entry, entry.tag, "na19" ) BibleOrgSysGlobals.checkXMLNoTail( entry, entry.tag, "kaq9" ) # Process the entry attributes first strongs5 = None for attrib,value in entry.items(): if attrib == "strongs": strongs5 = value if BibleOrgSysGlobals.verbosityLevel > 2: print( "Validating {} entry…".format( strongs5 ) ) else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert len(strongs5)==5 and strongs5.isdigit() entryResults = {} entryString = "" gettingEssentials = True for j, element in enumerate( entry ): #print( strongs5, j, element.tag, repr(entryString) ) if element.tag == "strongs": if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j==0 and element.text BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d" ) if strongs5!='02717' and (3203 > int(strongs5) > 3302): BibleOrgSysGlobals.checkXMLNoTail( element, element.tag, "f3g7" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g" ) strongs = element.text if BibleOrgSysGlobals.debugFlag: assert strongs5.endswith( strongs ) if element.tail and element.tail.strip(): entryString += element.tail.strip() elif element.tag == "greek": location = "greek in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35" ) # Process the attributes translit = greek = beta = None for attrib,value in element.items(): if attrib=="translit": translit = value elif attrib=="unicode": greek = value elif attrib=="BETA": beta = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert greek and translit and beta if 'word' not in entryResults: # This is the first/main entry if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j==1 BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) entryResults['word'] = (greek, translit, beta) else: #print( "Have multiple greek entries in " + strongs5 ) if BibleOrgSysGlobals.debugFlag: assert j > 2 gettingEssentials = False entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ) #.replace( '\n', '' ) elif element.tag == "pronunciation": location = "pronunciation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes pronunciation = None for attrib,value in element.items(): if attrib=="strongs": pronunciation = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if gettingEssentials: #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) if BibleOrgSysGlobals.debugFlag: assert j == 2 assert pronunciation assert 'pronunciation' not in entryResults entryResults['pronunciation'] = pronunciation else: if BibleOrgSysGlobals.debugFlag: assert j>2 and not gettingEssentials if element.tail and element.tail.strip(): entryString += element.tail.strip().replace( '\n', '' ) elif element.tag == "strongs_derivation": location = "strongs_derivation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "derivation", repr(derivation) ) if BibleOrgSysGlobals.debugFlag: assert derivation and '\t' not in derivation and '\n' not in derivation entryString += derivation elif element.tag == "strongs_def": location = "strongs_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "jd28" ) definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "definition", repr(definition) ) if BibleOrgSysGlobals.debugFlag: assert definition and '\t' not in definition and '\n' not in definition entryString += definition elif element.tag == "kjv_def": location = "kjv_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" ) #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" ) KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition entryString += KJVdefinition elif element.tag == "strongsref": location = "strongsref in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "kls2" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24" ) strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) if BibleOrgSysGlobals.debugFlag: assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef strongsRef = re.sub( '<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) strongsRef = re.sub( '<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #print( strongs5, "strongsRef", repr(strongsRef) ) entryString += ' ' + strongsRef elif element.tag == "see": location = "see in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes seeLanguage = seeStrongsNumber = None for attrib,value in element.items(): if attrib == "language": seeLanguage = value elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit() assert seeLanguage in ('GREEK','HEBREW',) if 'see' not in entryResults: entryResults['see'] = [] entryResults['see'].append( ('G' if seeLanguage=='GREEK' else 'H') + seeStrongsNumber ) else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text ) ) if entryString: #print( strongs5, "entryString", repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert '\t' not in entryString and '\n' not in entryString entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) if BibleOrgSysGlobals.debugFlag: assert 'strongsref' not in entryString entryResults['Entry'] = entryString #print( "entryResults", entryResults ) self.StrongsEntries[strongs] = entryResults
def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ def makeList(parameter1, parameter2): """ Returns a list containing all parameters. Parameter1 may already be a list. """ if isinstance(parameter1, list): #assert( parameter2 not in parameter1 ) parameter1.append(parameter2) return parameter1 else: return [parameter1, parameter2] # end of makeList assert (self._XMLtree) if self.__DataList: # We've already done an import/restructuring -- no need to repeat it return self.__DataList, self.__DataDict # We'll create a number of dictionaries with different elements as the key rawRefLinkList = [] actualLinkCount = 0 for element in self._XMLtree: #print( BibleOrgSysGlobals.elementStr( element ) ) # Get these first for helpful error messages sourceReference = element.find('sourceReference').text sourceComponent = element.find('sourceComponent').text assert (sourceComponent in ( 'Section', 'Verses', 'Verse', )) BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1') BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference, 'kd21') BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20') actualRawLinksList = [] for subelement in element: #print( BibleOrgSysGlobals.elementStr( subelement ) ) if subelement.tag in ( 'sourceReference', 'sourceComponent', ): # already processed these BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'sqw1') elif subelement.tag == 'BibleReferenceLink': BibleOrgSysGlobals.checkXMLNoText(subelement, sourceReference, 'haw9') BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'jsd9') targetReference = subelement.find('targetReference').text targetComponent = subelement.find('targetComponent').text assert (targetComponent in ( 'Section', 'Verses', 'Verse', )) linkType = subelement.find('linkType').text assert (linkType in ( 'TSK', 'QuotedOTReference', 'AlludedOTReference', 'PossibleOTReference', )) actualRawLinksList.append(( targetReference, targetComponent, linkType, )) actualLinkCount += 1 rawRefLinkList.append(( sourceReference, sourceComponent, actualRawLinksList, )) if BibleOrgSysGlobals.verbosityLevel > 1: print(" {} raw links loaded (with {} actual raw link entries)". format(len(rawRefLinkList), actualLinkCount)) myRefLinkList = [] actualLinkCount = 0 BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG") for j, (sourceReference, sourceComponent, actualRawLinksList) in enumerate(rawRefLinkList): # Just do some testing first if sourceComponent == 'Verse': x = SimpleVerseKey(sourceReference) else: flag = False try: x = SimpleVerseKey(sourceReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( sourceComponent, sourceReference)) raise TypeError # Now do the actual parsing parsedSourceReference = FlexibleVersesKey(sourceReference) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(j, sourceComponent, sourceReference, parsedSourceReference) #assert( parsedSourceReference.getShortText().replace(' ','_') == sourceReference ) actualLinksList = [] for k, (targetReference, targetComponent, linkType) in enumerate(actualRawLinksList): # Just do some testing first if targetComponent == 'Verse': x = SimpleVerseKey(targetReference) else: flag = False try: x = SimpleVerseKey(targetReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( targetComponent, targetReference)) raise TypeError # Now do the actual parsing try: parsedTargetReference = FlexibleVersesKey(targetReference) except TypeError: print( " Temporarily ignored {!r} (TypeError from FlexibleVersesKey)" .format(targetReference)) parsedTargetReference = None if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(' ', targetComponent, targetReference, parsedTargetReference) #assert( parsedTargetReference.getShortText().replace(' ','_',1) == targetReference ) actualLinksList.append(( targetReference, targetComponent, parsedTargetReference, linkType, )) actualLinkCount += 1 myRefLinkList.append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) if BibleOrgSysGlobals.verbosityLevel > 1: print(" {} links processed (with {} actual link entries)".format( len(rawRefLinkList), actualLinkCount)) #print( myRefLinkList ); halt self.__DataList = myRefLinkList # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Create a link dictionary (by verse key) myRefLinkDict = {} for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #print( sourceReference, sourceComponent, parsedSourceReference ) #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for verseRef in parsedSourceReference.getIncludedVerses(): #print( verseRef ) assert (isinstance(verseRef, SimpleVerseKey)) if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) #print( myRefLinkDict ); halt originalLinks = len(myRefLinkDict) print( " {} verse links added to dictionary (includes filling out spans)" .format(originalLinks)) #print( myRefLinkDict ); halt # Create a reversed link dictionary (by verse key) for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #print( sourceReference, sourceComponent, parsedSourceReference ) #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList: if parsedTargetReference is not None: for verseRef in parsedTargetReference.getIncludedVerses(): #print( verseRef ) assert (isinstance(verseRef, SimpleVerseKey)) if linkType == 'TSK': reverseLinkType = 'TSKQuoted' elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted' elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded' elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible' else: halt # Have a new linkType! if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (targetReference, targetComponent, parsedTargetReference, [ (sourceReference, sourceComponent, parsedSourceReference, reverseLinkType) ])) #print( myRefLinkDict ); halt totalLinks = len(myRefLinkDict) reverseLinks = totalLinks - originalLinks print(" {} reverse links added to dictionary to give {} total".format( reverseLinks, totalLinks)) #print( myRefLinkDict ); halt self.__DataDict = myRefLinkDict # Let's find the most number of references for a verse mostReferences = totalReferences = 0 for verseRef, entryList in self.__DataDict.items(): numRefs = len(entryList) if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef totalReferences += numRefs print(" {} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText())) print(" {} total links for all references".format(totalReferences)) return self.__DataList, self.__DataDict
def __validateSystem(self, bookOrderTree, systemName): """ Do a semi-automatic check of the XML file validity. """ assert bookOrderTree uniqueDict = {} for elementName in self.uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self.uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] expectedID = 1 for k, element in enumerate(bookOrderTree): if element.tag == self.mainElementTag: BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check ascending ID field ID = element.get("id") intID = int(ID) if intID != expectedID: logging.error( _("ID numbers out of sequence in record {} (got {} when expecting {}) for {}" ).format(k, intID, expectedID, systemName)) expectedID += 1 # Check that this is unique if element.text: if element.text in uniqueDict: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {}) for {}" ).format(element.text, element.tag, ID, k, systemName)) uniqueDict[element.text] = None # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, k)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check optional attributes on this main element for attributeName in self.optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, k)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, k)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Check compulsory elements for elementName in self.compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, k)) if not element.find(elementName).text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check optional elements for elementName in self.optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, k)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, k)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, k))
def getChangeLogFilepath( loggingFolder, projectName ): """ """ return os.path.join( loggingFolder, \ BibleOrgSysGlobals.makeSafeFilename( projectName.replace(' ','_') + '_ChangeLog.txt' ) )
def _validateSystem(self, punctuationTree, systemName): """ """ assert punctuationTree uniqueDict = {} for elementName in self.uniqueElements: uniqueDict["Element_" + elementName] = [] for attributeName in self.uniqueAttributes: uniqueDict["Attribute_" + attributeName] = [] for k, element in enumerate(punctuationTree): if element.tag in self.mainElementTags: BibleOrgSysGlobals.checkXMLNoTail(element, element.tag) if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag) if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag) # Check compulsory attributes on this main element for attributeName in self.compulsoryAttributes: attributeValue = element.get(attributeName) if attributeValue is None: logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}" ).format(attributeName, element.tag, k)) if not attributeValue: logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check optional attributes on this main element for attributeName in self.optionalAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional {!r} attribute is blank on {} element in record {}" ).format(attributeName, element.tag, k)) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get(attributeName) if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes: logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}" ).format(attributeName, attributeValue, element.tag, k)) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self.uniqueAttributes: attributeValue = element.get(attributeName) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_" + attributeName]: logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}" ).format(attributeValue, attributeName, element.tag, k)) uniqueDict["Attribute_" + attributeName].append(attributeValue) # Check compulsory elements for elementName in self.compulsoryElements: if element.find(elementName) is None: logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})" ).format(elementName, ID, k)) if not element.find(elementName).text: logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check optional elements for elementName in self.optionalElements: if element.find(elementName) is not None: if not element.find(elementName).text: logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})" ).format(elementName, ID, k)) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements: logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})" ).format(subelement.tag, subelement.text, ID, k)) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self.uniqueElements: if element.find(elementName) is not None: text = element.find(elementName).text if text in uniqueDict["Element_" + elementName]: logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})" ).format(text, elementName, ID, k)) uniqueDict["Element_" + elementName].append(text) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, k))
def demo(): """ Demonstrate reading and checking some Bible databases. """ if BibleOrgSysGlobals.verbosityLevel > 0: print(ProgNameVersion) if 1: # Load and process some of our test versions count = 0 for name, abbreviation, testFolder in ( # name, abbreviation, folder ( "Open English Translation—Literal Version", "OET-LV", "../../../../../Data/Work/Matigsalug/Bible/OET-LV/", ), #("Matigsalug", "MBTV", "../../../../../Data/Work/Matigsalug/Bible/MBTV/",), #("ESFM Test 1", "OET-LV", "Tests/DataFilesForTests/ESFMTest1/"), #("ESFM Test 2", "OET-RV", "Tests/DataFilesForTests/ESFMTest2/"), #("All Markers Project", "WEB+", "Tests/DataFilesForTests/USFMAllMarkersProject/"), #("USFM Error Project", "UEP", "Tests/DataFilesForTests/USFMErrorProject/"), #("BOS Exported Files", "Exported", "Tests/BOS_USFM_Export/"), ): count += 1 if os.access(testFolder, os.R_OK): if BibleOrgSysGlobals.verbosityLevel > 0: print("\nESFM A{}/".format(count)) EsfmB = ESFMBible(testFolder, name, abbreviation) EsfmB.load() print("Gen assumed book name:", repr(EsfmB.getAssumedBookName('GEN'))) print("Gen long TOC book name:", repr(EsfmB.getLongTOCName('GEN'))) print("Gen short TOC book name:", repr(EsfmB.getShortTOCName('GEN'))) print("Gen book abbreviation:", repr(EsfmB.getBooknameAbbreviation('GEN'))) if BibleOrgSysGlobals.verbosityLevel > 0: print(EsfmB) if BibleOrgSysGlobals.strictCheckingFlag: EsfmB.check() #print( EsfmB.books['GEN']._processedLines[0:40] ) EsfmBErrors = EsfmB.getErrors() # print( UBErrors ) if BibleOrgSysGlobals.commandLineOptions.export: ##EsfmB.toDrupalBible() EsfmB.doAllExports(wantPhotoBible=False, wantODFs=True, wantPDFs=True) newObj = BibleOrgSysGlobals.unpickleObject( BibleOrgSysGlobals.makeSafeFilename(abbreviation) + '.pickle', os.path.join("OutputFiles/", "BOS_Bible_Object_Pickle/")) if BibleOrgSysGlobals.verbosityLevel > 0: print("newObj is", newObj) else: print( "\nSorry, test folder {!r} is not readable on this computer." .format(testFolder)) if 0: # Test a whole folder full of folders of ESFM Bibles testBaseFolder = "Tests/DataFilesForTests/theWordRoundtripTestFiles/" def findInfo(somepath): """ Find out info about the project from the included copyright.htm file """ cFilepath = os.path.join(somepath, "copyright.htm") if not os.path.exists(cFilepath): return with open(cFilepath, encoding='utf-8' ) as myFile: # Automatically closes the file when done lastLine, lineCount = None, 0 title, nameDict = None, {} for line in myFile: lineCount += 1 if lineCount == 1 and line and line[0] == chr( 65279): #U+FEFF logging.info( "ESFMBible: Detected UTF-16 Byte Order Marker in copyright.htm file" ) line = line[1:] # Remove the UTF-8 Byte Order Marker if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines lastLine = line if line.startswith("<title>"): title = line.replace("<title>", "").replace("</title>", "").strip() if line.startswith('<option value="'): adjLine = line.replace('<option value="', '').replace('</option>', '') ESFM_BBB, name = adjLine[:3], adjLine[11:] BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromESFM( ESFM_BBB) #print( ESFM_BBB, BBB, name ) nameDict[BBB] = name return title, nameDict # end of findInfo count = totalBooks = 0 if os.access(testBaseFolder, os.R_OK): # check that we can read the test data for something in sorted(os.listdir(testBaseFolder)): somepath = os.path.join(testBaseFolder, something) if os.path.isfile(somepath): print("Ignoring file {!r} in {!r}".format( something, testBaseFolder)) elif os.path.isdir( somepath ): # Let's assume that it's a folder containing a ESFM (partial) Bible #if not something.startswith( 'ssx' ): continue # This line is used for debugging only specific modules count += 1 title = None findInfoResult = findInfo(somepath) if findInfoResult: title, bookNameDict = findInfoResult if title is None: title = something[:-5] if something.endswith( "_usfm") else something name, testFolder = title, somepath if os.access(testFolder, os.R_OK): if BibleOrgSysGlobals.verbosityLevel > 0: print("\nESFM B{}/".format(count)) EsfmB = ESFMBible(testFolder, name) EsfmB.load() if BibleOrgSysGlobals.verbosityLevel > 0: print(EsfmB) if BibleOrgSysGlobals.strictCheckingFlag: EsfmB.check() EsfmBErrors = EsfmB.getErrors() #print( EsfmBErrors ) if BibleOrgSysGlobals.commandLineOptions.export: EsfmB.doAllExports(wantPhotoBible=False, wantODFs=False, wantPDFs=False) else: print( "\nSorry, test folder {!r} is not readable on this computer." .format(testFolder)) if count: print("\n{} total ESFM (partial) Bibles processed.".format( count)) if totalBooks: print("{} total books ({} average per folder)".format( totalBooks, round(totalBooks / count))) else: print( "\nSorry, test folder {!r} is not readable on this computer.". format(testBaseFolder))
def loadParagraph( paragraphXML, paragraphlocation ): """ Load a paragraph from the USX XML. Uses (and updates) c,v information from the containing function. """ nonlocal c, v # Process the attributes first paragraphStyle = None for attrib,value in paragraphXML.items(): if attrib=='style': paragraphStyle = value # This is basically the USFM marker name else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) # Now process the paragraph text (or write a paragraph marker anyway) self.addLine( paragraphStyle, paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else '' ) # Now process the paragraph subelements for element in paragraphXML: location = element.tag + ' ' + paragraphlocation #print( "USXXMLBibleBook.load", c, v, element.tag, location ) if element.tag == 'verse': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first verseStyle = None for attrib,value in element.items(): if attrib=='number': v = value elif attrib=='style': verseStyle = value else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) if verseStyle != 'v': logging.warning( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) ) self.addLine( verseStyle, v + ' ' ) # Now process the tail (if there's one) which is the verse text if element.tail: vText = element.tail.strip() if vText: #print( repr(vText) ) self.appendToLastLine( vText ) elif element.tag == 'char': # Process the attributes first charStyle = None for attrib,value in element.items(): if attrib=='style': charStyle = value # This is basically the USFM character marker name #print( " charStyle", charStyle ) assert( not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle ) ) else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) charLine = "\\{} {} ".format( charStyle, element.text ) # Now process the subelements -- chars are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( c, v, element.tag ) if subelement.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first subCharStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': subCharStyle = value elif attrib=='closed': assert( value=='false' ) charClosed = False else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) charLine += "\\{} {}".format( subCharStyle, subelement.text ) if charClosed: charLine += "\\{}*".format( subCharStyle ) charLine += '' if subelement.tail is None else subelement.tail.strip() else: logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) ) # A character field must be added to the previous field charLine += "\\{}*{}".format( charStyle, '' if element.tail is None else element.tail.strip() ) if debuggingThisModule: print( "USX.loadParagraph:", c, v, paragraphStyle, charStyle, repr(charLine) ) self.appendToLastLine( charLine ) elif element.tag == 'note': BibleOrgSysGlobals.checkXMLNoText( element, location ) # Process the attributes first noteStyle = noteCaller = None for attrib,value in element.items(): if attrib=='style': noteStyle = value # This is basically the USFM marker name assert( noteStyle in ('x','f',) ) elif attrib=='caller': noteCaller = value # Usually hyphen or a symbol to be used for the note else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) assert( noteStyle and noteCaller ) # both compulsory noteLine = "\\{} {} ".format( noteStyle, noteCaller ) # Now process the subelements -- notes are one of the few multiply embedded fields in USX for subelement in element: sublocation = subelement.tag + ' ' + location #print( c, v, element.tag ) if subelement.tag == 'char': # milestone (not a container) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first charStyle, charClosed = None, True for attrib,value in subelement.items(): if attrib=='style': charStyle = value elif attrib=='closed': assert( value=='false' ) charClosed = False else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) noteLine += "\\{} {}".format( charStyle, subelement.text ) if charClosed: noteLine += "\\{}*".format( charStyle ) noteLine += '' if subelement.tail is None else subelement.tail.strip() elif subelement.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation ) BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation ) # Process the attributes first unmmatchedMarker = None for attrib,value in subelement.items(): if attrib=='marker': unmmatchedMarker = value else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) ) self.addPriorityError( 2, c, v, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) ) else: logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) ) if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail #noteLine += "\\{}*".format( charStyle ) noteLine += "\\{}*".format( noteStyle ) if element.tail: noteText = element.tail.strip() noteLine += noteText self.appendToLastLine( noteLine ) elif element.tag == 'link': # Used to include extra resources BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) # Process the attributes first linkStyle = linkDisplay = linkTarget = None for attrib,value in element.items(): if attrib=='style': linkStyle = value assert( linkStyle in ('jmp',) ) elif attrib=='display': linkDisplay = value # e.g., "click here" elif attrib=='target': linkTarget = value # e.g., some reference else: logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) ) self.addPriorityError( 3, c, v, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) ) elif element.tag == 'unmatched': # Used to denote errors in the source text BibleOrgSysGlobals.checkXMLNoText( element, location ) BibleOrgSysGlobals.checkXMLNoTail( element, location ) BibleOrgSysGlobals.checkXMLNoAttributes( element, location ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location ) self.addPriorityError( 2, c, v, _("Unmatched element in {}").format( location) ) else: logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, c, v, location ) ) self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) ) for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] ) if BibleOrgSysGlobals.debugFlag: halt
self.assertGreater( len(something), 1 ) result4 = self.UMs.getTypicalNoteSets( 'xr' ) self.assertTrue( isinstance( result4, tuple ) ) self.assertLess( len(result4), len(result1) ) for something in result4: self.assertTrue( isinstance( something , list ) ) self.assertTrue( something ) self.assertGreater( len(something), 1 ) result5 = self.UMs.getTypicalNoteSets( 'pq' ) self.assertEqual( result5, None ) #end of test_2200_getTypicalNoteSets def test_2210_getMarkerListFromText( self ): """ Test the getMarkerListFromText function. """ self.assertEqual( self.UMs.getMarkerListFromText(''), [] ) self.assertEqual( self.UMs.getMarkerListFromText('This is just plain text.'), [] ) self.assertEqual( self.UMs.getMarkerListFromText('This \\bk book\\bk* is good'), \ [('bk',5,' ','\\bk ',['bk'],1,'book'), ('bk',13,'*','\\bk*',[],None,' is good')] ) #end of test_2210_getMarkerListFromText # end of USFMMarkersTests class if __name__ == '__main__': # Configure basic set-up parser = BibleOrgSysGlobals.setup( ProgName, ProgVersion ) BibleOrgSysGlobals.addStandardOptionsAndProcess( parser ) if BibleOrgSysGlobals.verbosityLevel > 1: print( ProgNameVersion ) unittest.main() # Automatically runs all of the above tests # end of USFMMarkersTests.py