Ejemplo n.º 1
0
    def __validateAndExtractChapter( self, BBB, thisBook, chapter ):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter...") )

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib,value in chapter.items():
            if attrib=="n":
                chapterNumber = value
            elif attrib=="VERSES":
                numVerses = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) )
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace( 'of Solomon ', '' ) # Fix a mistake in the Chinese_SU module
            thisBook.addLine( 'c', chapterNumber )
        else: logging.error( "Missing 'n' attribute in chapter element for BBB".format( BBB ) )

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'l5ks' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5f7h' )
                verseNumber = toVerseNumber = None
                for attrib,value in element.items():
                    if attrib=="n":
                        verseNumber = value
                    elif attrib=="t":
                        toVerseNumber = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( verseNumber )
                #thisBook.addLine( 'v', verseNumber )
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )
                if vText: # This is the main text of the verse (follows the verse milestone)
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText: # This is how they represent poety
                        #print( "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate( vText.split( '\n' ) ):
                            if j==0:
                                thisBook.addLine( 'q1', '' )
                                thisBook.addLine( 'v', verseNumber + ' ' + textBit )
                            else: thisBook.addLine( 'q1', textBit )
                    else: # Just one verse line
                        thisBook.addLine( 'v', verseNumber + ' ' + vText )
            else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag ) )
Ejemplo n.º 2
0
 def loadCharacterFormatting( self, element, location, BBB, C, V ):
     """
     """
     marker, text, tail = element.tag, clean(element.text), clean(element.tail)
     BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'sd12' )
     self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
     for subelement in element:
         sublocation = subelement.tag + " of " + location
         #print( "element", repr(element.tag) )
         if subelement.tag == 'f':
             #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
             self.loadFootnote( subelement, sublocation, BBB, C, V )
         else:
             logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) )
             if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
     self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
Ejemplo n.º 3
0
    def getMaximumPossibleFilenameTuples( self, strictCheck=False ):
        """
        Find the method that finds the maximum number of USFM Bible files.
            The result is a list of 2-tuples in the default rough sequence order from the BibleBooksCodes module.
                Each tuple contains ( BBB, filename ) not including the folder path.
        """
        #if BibleOrgSysGlobals.debugFlag: print( "getMaximumPossibleFilenameTuples( {} )".format( strictCheck ) )

        resultString, resultList = 'Confirmed', self.getConfirmedFilenameTuples()
        resultListExt = self.getPossibleFilenameTuplesExt()
        if len(resultListExt) > len(resultList):
            resultString, resultList = 'External', resultListExt
        resultListInt = self.getPossibleFilenameTuplesInt()
        if len(resultListInt) > len(resultList):
            resultString, resultList = 'Internal', resultListInt
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "getMaximumPossibleFilenameTuples: using {}".format( resultString ) )

        if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
            #if BibleOrgSysGlobals.debugFlag: print( "  getMaximumPossibleFilenameTuples doing strictCheck…" )
            for BBB,filename in resultList[:]:
                firstLine = BibleOrgSysGlobals.peekIntoFile( filename, self.givenFolderName )
                #print( 'UFN', repr(firstLine) )
                if firstLine is None: resultList.remove( (BBB,filename) ); continue # seems we couldn't decode the file
                if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
                    logging.info( "USFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
                    firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
                if not firstLine or firstLine[0] != '\\': # don't allow a blank first line and must start with a backslash
                    resultList.remove( (BBB,filename) )

        self.lastTupleList = resultList
        #print( "getMaximumPossibleFilenameTuples is returning", resultList )
        return resultList # No need to sort these, coz all the above calls produce sorted results
Ejemplo n.º 4
0
    def saveAnyChangedGlosses( self, exportAlso=False ):
        """
        Save the glossing dictionary to a pickle file.
        """
        if debuggingThisModule: print( "saveAnyChangedGlosses()" )

        if self.haveGlossingDictChanges:
            BibleOrgSysGlobals.backupAnyExistingFile( self.glossingDictFilepath, 9 )
            if BibleOrgSysGlobals.verbosityLevel > 2 or debuggingThisModule:
                print( "  Saving Hebrew glossing dictionary ({}->{} entries) to '{}'…".format( self.loadedGlossEntryCount, len(self.glossingDict), self.glossingDictFilepath ) )
            elif BibleOrgSysGlobals.verbosityLevel > 1:
                print( "  Saving Hebrew glossing dictionary ({}->{} entries)…".format( self.loadedGlossEntryCount, len(self.glossingDict) ) )
            with open( self.glossingDictFilepath, 'wb' ) as pickleFile:
                pickle.dump( self.glossingDict, pickleFile )

            if exportAlso: self.exportGlossingDictionary()
Ejemplo n.º 5
0
    def load( self ):
        """
        Load a single source XML file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        self.tree = ElementTree().parse( self.sourceFilepath )
        if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == HaggaiXMLBible.treeTag:
            location = "Haggai XML file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )

            schema = name = status = BibleType = revision = version = lgid = None
            for attrib,value in self.tree.items():
                if attrib == HaggaiXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation':
                    schema = value
                elif attrib == "biblename":
                    name = value
                elif attrib == "lgid":
                    lgid = value # In italian.xml this is set to "german"
                elif attrib == "status":
                    status = value
                elif attrib == "type":
                    BibleType = value
                elif attrib == "revision":
                    revision = value
                elif attrib == "version":
                    version = value
                else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) )
            if name: self.name = name
            if status: self.status = status
            if revision: self.revision = revision
            if version: self.version = version

            if self.tree[0].tag == 'INFORMATION':
                self.header = self.tree[0]
                self.tree.remove( self.header )
                self.__validateAndExtractHeader()
            else: # Handle information records at the END of the file
                ix = len(self.tree) - 1
                if self.tree[ix].tag == 'INFORMATION':
                    self.header = self.tree[ix]
                    self.tree.remove( self.header )
                    self.__validateAndExtractHeader()

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == HaggaiXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' )
                    self.__validateAndExtractBook( element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.bookTag, element.tag ) )
        else: logging.error( "Expected to load {!r} but got {!r}".format( HaggaiXMLBible.treeTag, self.tree.tag ) )
        self.doPostLoadProcessing()
Ejemplo n.º 6
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'Haggai XML Bible Book object'
            thisBook.objectTypeString = 'Haggai'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' )
                    thisBook.addLine( 'mt', element.text )
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )
Ejemplo n.º 7
0
    def __validateAndExtractChapter( self, BBB, thisBook, chapter ):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter…") )

        # Process the chapter attributes first
        chapterNumber = numVerses = None
        for attrib,value in chapter.items():
            if attrib=="cnumber":
                chapterNumber = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) )
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            thisBook.addLine( 'c', chapterNumber )
        else: logging.error( "Missing 'n' attribute in chapter element for {}".format( BBB ) )

        for element in chapter:
            if element.tag == HaggaiXMLBible.paragraphTag:
                location = "paragraph in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractParagraph( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.verseTag+'disabled':
                location = "verse in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms
                location = "caption in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' )
                # Handle caption attributes
                vRef = None
                for attrib,value in element.items():
                    if attrib=="vref":
                        vRef = value
                        if BibleOrgSysGlobals.debugFlag: assert vRef == '1'
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert vRef
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) )
                if vText: # This is the main text of the caption
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero
            else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
Ejemplo n.º 8
0
    def save( self ):
        """
        Save all of the program settings to disk.
            They must have already been saved into self.data.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("ApplicationSettings.save() in {!r}").format( self.settingsFilepath ) )
            assert self.data
            assert self.settingsFilepath

        BibleOrgSysGlobals.backupAnyExistingFile( self.settingsFilepath, numBackups=8 )
        with open( self.settingsFilepath, 'wt', encoding='utf-8' ) as settingsFile: # It may or may not have previously existed
            # Put a (comment) heading in the file first
            settingsFile.write( '# ' + _("{} {} settings file").format( APP_NAME, SettingsVersion ) + '\n' )
            settingsFile.write( '# ' + _("Originally saved {} as {}") \
                .format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self.settingsFilepath ) + '\n\n' )

            self.data.write( settingsFile )
Ejemplo n.º 9
0
    def __validateAndExtractBook( self, book, bookNumber ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )

        if bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName )
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText( adjustedBookName )
        BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
        if BBB2 != BBB: # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                print( "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) )
            BBB = BBB2
            #print( BBB ); halt

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )
Ejemplo n.º 10
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book…") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBBFromText( bookName ) # Booknames are usually in English
            if not BBB: # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText( bookName ) # Try non-English booknames
                #print( "bookName", bookName, BBB )
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.addLine( 'h', bookName )
                thisBook.addLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                        BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
                self.stashBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
Ejemplo n.º 11
0
 def loadFigure( self, element, location ):
     """
     """
     BibleOrgSysGlobals.checkXMLNoText( element, location, 'ff36' )
     BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'cf35' )
     figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
     for subelement in element:
         sublocation = subelement.tag + " of " + location
         figTag, figText = subelement.tag, clean(subelement.text)
         assert( figTag in figDict )
         figDict[figTag] = '' if figText is None else figText
         BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
         BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
         BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
     newString = ''
     for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
         newString += ('' if j==0 else '|') + figDict[tag]
     figTail = clean( element.tail )
     self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
Ejemplo n.º 12
0
def segmentizeLine( line, segmentEndPunctuation='.?!;:' ):
    """
    Break the line into segments (like sentences that should match across the translations)
        and then break each segment into words.

    If you want case folding, convert line to lowerCase before calling.

    Set segmentEndPunctuation to None if you don't want the lines further divided.

    Returns a list of lists of words.
    """
    if BibleOrgSysGlobals.debugFlag:
        if debuggingThisModule:
            print( exp("segmentizeLine( {!r} )").format( line ) )

    if segmentEndPunctuation:
        for segmentEndChar in segmentEndPunctuation:
            line = line.replace( segmentEndChar, 'SsSsSsS' )
    line = line.replace('—',' ').replace('–',' ') # Treat em-dash and en-dash as word break characters


    lineList = []
    for segment in line.split( 'SsSsSsS' ):
        segmentList = []
        for rawWord in segment.split():
            word = rawWord
            for internalMarker in BibleOrgSysGlobals.internal_SFMs_to_remove: word = word.replace( internalMarker, '' )
            word = BibleOrgSysGlobals.stripWordPunctuation( word )
            if word and not word[0].isalnum():
                #print( "not alnum", repr(rawWord), repr(word) )
                if len(word) > 1:
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                        print( "segmentizeLine: {} {}:{} ".format( self.BBB, C, V ) \
                                            + _("Have unexpected character starting word {!r}").format( word ) )
                    word = word[1:]
            if word: # There's still some characters remaining after all that stripping
                #print( "here", repr(rawWord), repr(word) )
                if 1 or BibleOrgSysGlobals.verbosityLevel > 3: # why???
                    for k,char in enumerate(word):
                        if not char.isalnum() and (k==0 or k==len(word)-1 or char not in BibleOrgSysGlobals.MEDIAL_WORD_PUNCT_CHARS):
                            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                                print( "segmentizeLine: {} {}:{} ".format( self.BBB, C, V ) + _("Have unexpected {!r} in word {!r}").format( char, word ) )
                lcWord = word.lower()
                isAReferenceOrNumber = True
                for char in word:
                    if not char.isdigit() and char not in ':-,.': isAReferenceOrNumber = False; break
                if not isAReferenceOrNumber:
                    segmentList.append( word )
                    #lDict['allWordCounts'][word] = 1 if word not in lDict['allWordCounts'] else lDict['allWordCounts'][word] + 1
                    #lDict['allCaseInsensitiveWordCounts'][lcWord] = 1 if lcWord not in lDict['allCaseInsensitiveWordCounts'] else lDict['allCaseInsensitiveWordCounts'][lcWord] + 1
        lineList.append( segmentList )

    #print( '  lineList', lineList )
    return lineList
Ejemplo n.º 13
0
    def __validateAndExtractParagraph( self, BBB, chapterNumber, thisBook, paragraph ):
        """
        Check/validate and extract paragraph data from the given XML book record
            finding and saving paragraphs and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML paragraph...") )

        location = "paragraph in {} {}".format( BBB, chapterNumber )
        BibleOrgSysGlobals.checkXMLNoAttributes( paragraph, location, 'brgw3' )
        BibleOrgSysGlobals.checkXMLNoText( paragraph, location, 'brgw3' )
        BibleOrgSysGlobals.checkXMLNoTail( paragraph, location, 'brgw3' )
        thisBook.addLine( 'p', '' )

        # Handle verse subelements (verses)
        for element in paragraph:
            if element.tag == HaggaiXMLBible.verseTag:
                location = "verse in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms
                location = "caption in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' )
                # Handle caption attributes
                vRef = None
                for attrib,value in element.items():
                    if attrib=="vref":
                        vRef = value
                        if BibleOrgSysGlobals.debugFlag: assert( vRef == '1' )
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( vRef )
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) )
                if vText: # This is the main text of the caption
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero
            else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
Ejemplo n.º 14
0
    def exportGlossingDictionary( self, glossingDictExportFilepath=None ):
        """
        Export the glossing dictionary to a text file
            plus a reversed text file (without the references).

        Also does a few checks while exporting.
            (These can be fixed and then the file can be imported.)
        """
        #print( "exportGlossingDictionary()" )
        if glossingDictExportFilepath is None: glossingDictExportFilepath = DEFAULT_GLOSSING_EXPORT_FILEPATH
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( _("Exporting glossing dictionary ({} entries) to '{}'…").format( len(self.glossingDict), glossingDictExportFilepath ) )

        BibleOrgSysGlobals.backupAnyExistingFile( glossingDictExportFilepath, 5 )
        with open( glossingDictExportFilepath, 'wt' ) as exportFile:
            for word,(genericGloss,genericReferencesList,specificReferencesDict) in self.glossingDict.items():
                if ' ' in word or '/' in word:
                    logging.error( _("Word {!r} has illegal characters").format( word ) )
                if ' ' in genericGloss:
                    logging.error( _("Generic gloss {!r} for {!r} has illegal characters").format( genericGloss, word ) )
                if word.count('=') != genericGloss.count('='):
                    logging.error( _("Generic gloss {!r} and word {!r} has different numbers of morphemes").format( genericGloss, word ) )
                if not genericReferencesList:
                    logging.error( _("Generic gloss {!r} for {!r} has no references").format( genericGloss, word ) )
                exportFile.write( '{}  {}  {}  {}\n'.format( genericReferencesList, specificReferencesDict, genericGloss, word ) ) # Works best in editors with English on the left, Hebrew on the right

        if self.glossingDict:
            if BibleOrgSysGlobals.verbosityLevel > 1:
                print( _("Exporting reverse glossing dictionary ({} entries) to '{}'…").format( len(self.glossingDict), DEFAULT_GENERIC_GLOSSING_REVERSE_EXPORT_FILEPATH ) )
            BibleOrgSysGlobals.backupAnyExistingFile( DEFAULT_GENERIC_GLOSSING_REVERSE_EXPORT_FILEPATH, 5 )
            doneGlosses = []
            with open( DEFAULT_GENERIC_GLOSSING_REVERSE_EXPORT_FILEPATH, 'wt' ) as exportFile:
                for word,(genericGloss,genericReferencesList,specificReferencesDict) in sorted( self.glossingDict.items(), key=lambda theTuple: theTuple[1][0].lower() ):
                    if genericGloss in doneGlosses:
                        logging.warning( _("Generic gloss {!r} has already appeared: currently for word {!r}").format( genericGloss, word ) )
                    exportFile.write( '{}  {}\n'.format( genericGloss, word ) ) # Works best in editors with English on the left, Hebrew on the right
                    doneGlosses.append( genericGloss )
Ejemplo n.º 15
0
 def loadCrossreference( self, element, location ):
     """
     Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
     """
     text, tail = clean(element.text), clean(element.tail)
     caller = None
     for attrib,value in element.items():
         if attrib == 'caller':
             caller = value
         else:
             logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
     self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
     for subelement in element:
         sublocation = subelement.tag + " of " + location
         marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
         #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
         #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
             #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
         if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','xo','xt',) )
         if marker=='ref':
             assert( xText )
             BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
             target = None
             for attrib,value in subelement.items():
                 if attrib == 'tgt': target = value
                 else:
                     logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
             if target:
                 self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
             else: halt
         else:
             BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
             self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
             if marker[0] == 'x': # Starts with x, e.g., xo, xt
                 for sub2element in subelement:
                     sub2location = sub2element.tag + " of " + sublocation
                     marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                     BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                     if marker2=='ref':
                         if xText2:
                             #print( 'xt2', marker2, repr(xText2), repr(xTail2), sub2location )
                             self.thisBook.appendToLastLine( xText2 )
                         target = None
                         for attrib,value in sub2element.items():
                             if attrib == 'tgt': target = value
                             else:
                                 logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                         if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                         else: halt
                     else: halt
                     if xTail2: self.thisBook.appendToLastLine( xTail2 )
             else: halt
         if xTail:
             self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
     self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
Ejemplo n.º 16
0
    def validateEntries( self, segment ):
        """
        Check/validate the given Strongs lexicon entries.
        """
        if BibleOrgSysGlobals.debugFlag: assert segment.tag == "entries"
        BibleOrgSysGlobals.checkXMLNoText( segment, segment.tag, "kw99" )
        BibleOrgSysGlobals.checkXMLNoTail( segment, segment.tag, "ls90" )
        BibleOrgSysGlobals.checkXMLNoAttributes( segment, segment.tag, "hsj2" )

        self.StrongsEntries = {}
        for element in segment:
            if element.tag == "entry":
                self.validateEntry( element )
Ejemplo n.º 17
0
def testMySwB( indexString, MySwBfolder, MySwBfilename ):
    """
    Crudely demonstrate the MySword Bible class.
    """
    #print( "tMSB", MySwBfolder )
    import VerseReferences
    #testFolder = "../../../../../Data/Work/Bibles/MySword modules/" # Must be the same as below

    #TUBfolder = os.path.join( MySwBfolder, MySwBfilename )
    if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Demonstrating the MySword Bible class {}…").format( indexString) )
    if BibleOrgSysGlobals.verbosityLevel > 0: print( "  Test folder is {!r} {!r}".format( MySwBfolder, MySwBfilename ) )
    MySwB = MySwordBible( MySwBfolder, MySwBfilename )
    MySwB.preload()
    #MySwB.load() # Load and process the file
    if BibleOrgSysGlobals.verbosityLevel > 1: print( MySwB ) # Just print a summary
    #print( MySwB.suppliedMetadata['MySword'] )
    if MySwB is not None:
        if BibleOrgSysGlobals.strictCheckingFlag: MySwB.check()
        for reference in ( ('OT','GEN','1','1'), ('OT','GEN','1','3'), ('OT','PSA','3','0'), ('OT','PSA','3','1'), \
                            ('OT','DAN','1','21'),
                            ('NT','MAT','3','5'), ('NT','JDE','1','4'), ('NT','REV','22','21'), \
                            ('DC','BAR','1','1'), ('DC','MA1','1','1'), ('DC','MA2','1','1',), ):
            (t, b, c, v) = reference
            if t=='OT' and len(MySwB)==27: continue # Don't bother with OT references if it's only a NT
            if t=='NT' and len(MySwB)==39: continue # Don't bother with NT references if it's only a OT
            if t=='DC' and len(MySwB)<=66: continue # Don't bother with DC references if it's too small
            svk = VerseReferences.SimpleVerseKey( b, c, v )
            #print( svk, ob.getVerseDataList( reference ) )
            try:
                shortText, verseText = svk.getShortText(), MySwB.getVerseText( svk )
                if BibleOrgSysGlobals.verbosityLevel > 1: print( reference, shortText, verseText )
            except KeyError:
                if BibleOrgSysGlobals.verbosityLevel > 1: print( reference, "not found!!!" )

        if 0: # Now export the Bible and compare the round trip
            MySwB.toMySword()
            #doaResults = MySwB.doAllExports( wantPhotoBible=False, wantODFs=False, wantPDFs=False )
            if BibleOrgSysGlobals.strictCheckingFlag: # Now compare the original and the derived USX XML files
                outputFolder = "OutputFiles/BOS_MySword_Reexport/"
                if BibleOrgSysGlobals.verbosityLevel > 1: print( "\nComparing original and re-exported MySword files…" )
                result = BibleOrgSysGlobals.fileCompare( MySwBfilename, MySwBfilename, MySwBfolder, outputFolder )
                if BibleOrgSysGlobals.debugFlag:
                    if not result: halt
    def _validate( self ):
        """
        Check/validate the loaded data.
        """
        assert self._XMLtree

        uniqueDict = {}
        #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        for j,element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( "Compulsory {!r} attribute is missing from {} element in record {}".format( attributeName, element.tag, j ) )
                    if not attributeValue and attributeName!="type":
                        logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) )

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( "Optional {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}".format( attributeName, attributeValue, element.tag, j ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None and attributeName!="reference_name":
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}".format( attributeValue, attributeName, element.tag, j ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )
            else:
                logging.warning( "Unexpected element: {} in record {}".format( element.tag, j ) )
Ejemplo n.º 19
0
    def load( self ):
        """
        Load a single source XML file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        self.tree = ElementTree().parse( self.sourceFilepath )
        if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == OpenSongXMLBible.treeTag:
            location = "XML file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )

            name = shortName = None
            for attrib,value in self.tree.items():
                if attrib=="n":
                    name = value
                elif attrib=="sn":
                    shortName = value
                else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) )

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == OpenSongXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' )
                    self.__validateAndExtractBook( element )
                elif element.tag == 'OT':
                    pass
                elif element.tag == 'NT':
                    pass
                else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag ) )
        else: logging.error( "Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.tree.tag ) )
        self.doPostLoadProcessing()
Ejemplo n.º 20
0
def testeSwB( eSwBfolder, eSwBfilename ):
    # Crudely demonstrate the e-Sword Bible class
    import VerseReferences
    #testFolder = "../../../../../Data/Work/Bibles/e-Sword modules/" # Must be the same as below

    #TUBfolder = os.path.join( eSwBfolder, eSwBfilename )
    if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Demonstrating the e-Sword Bible class...") )
    if BibleOrgSysGlobals.verbosityLevel > 0: print( "  Test folder is {} {}".format( repr(eSwBfolder), repr(eSwBfilename) ) )
    eSwB = ESwordBible( eSwBfolder, eSwBfilename )
    eSwB.load() # Load and process the file
    if BibleOrgSysGlobals.verbosityLevel > 1: print( eSwB ) # Just print a summary
    #print( eSwB.settingsDict )
    if 0 and eSwB:
        if BibleOrgSysGlobals.strictCheckingFlag: eSwB.check()
        for reference in ( ('OT','GEN','1','1'), ('OT','GEN','1','3'), ('OT','PSA','3','0'), ('OT','PSA','3','1'), \
                            ('OT','DAN','1','21'),
                            ('NT','MAT','3','5'), ('NT','JDE','1','4'), ('NT','REV','22','21'), \
                            ('DC','BAR','1','1'), ('DC','MA1','1','1'), ('DC','MA2','1','1',), ):
            (t, b, c, v) = reference
            if t=='OT' and len(eSwB)==27: continue # Don't bother with OT references if it's only a NT
            if t=='NT' and len(eSwB)==39: continue # Don't bother with NT references if it's only a OT
            if t=='DC' and len(eSwB)<=66: continue # Don't bother with DC references if it's too small
            svk = VerseReferences.SimpleVerseKey( b, c, v )
            #print( svk, ob.getVerseDataList( reference ) )
            shortText, verseText = svk.getShortText(), eSwB.getVerseText( svk )
            if BibleOrgSysGlobals.verbosityLevel > 1: print( reference, shortText, verseText )

        # Now export the Bible and compare the round trip
        eSwB.toESword()
        doaResults = eSwB.doAllExports( wantPhotoBible=False, wantODFs=False, wantPDFs=False )
        if BibleOrgSysGlobals.strictCheckingFlag: # Now compare the original and the derived USX XML files
            outputFolder = "OutputFiles/BOS_e-Sword_Reexport/"
            if BibleOrgSysGlobals.verbosityLevel > 1: print( "\nComparing original and re-exported e-Sword files..." )
            result = BibleOrgSysGlobals.fileCompare( eSwBfilename, eSwBfilename, eSwBfolder, outputFolder )
            if BibleOrgSysGlobals.debugFlag:
                if not result: halt
Ejemplo n.º 21
0
    #print( "geometry", geometryMap )
    #for something in geometryMap:
    #print( repr(something) )

    settings = ApplicationSettings('BiblelatorData/', 'BiblelatorSettings/',
                                   ProgName)
    settings.load()
    print(str(settings))
    print(repr(settings))

    #tkRootWindow.destroy() #  Useful if we want to measure the start-up time

    # Start the program running
    tkRootWindow.mainloop()


# end of ApplicationSettings.demo

if __name__ == '__main__':
    from multiprocessing import freeze_support
    freeze_support()  # Multiprocessing support for frozen Windows executables

    # Configure basic set-up
    parser = BibleOrgSysGlobals.setup(ProgName, ProgVersion)
    BibleOrgSysGlobals.addStandardOptionsAndProcess(parser)

    demo()

    BibleOrgSysGlobals.closedown(ProgName, ProgVersion)
# end of Settings.py
Ejemplo n.º 22
0
    """
    if BibleOrgSysGlobals.verbosityLevel > 1: print(ProgNameVersion)

    if BibleOrgSysGlobals.commandLineOptions.export:
        brlc = BibleReferencesLinksConverter().loadAndValidate(
        )  # Load the XML
        brlc.exportDataWithIndex()  # Produce a data file and an index file
        brlc.pickle()  # Produce a pickle output file
        brlc.exportDataToJSON()  # Produce a json output file
        brlc.exportDataToPython()  # Produce the .py tables
        brlc.exportDataToC()  # Produce the .h and .c tables

    else:  # Must be demo mode
        # Demo the converter object
        brlc = BibleReferencesLinksConverter().loadAndValidate(
        )  # Load the XML
        print(brlc)  # Just print a summary


# end of demo

if __name__ == '__main__':
    # Configure basic set-up
    parser = BibleOrgSysGlobals.setup(ProgName, ProgVersion)
    BibleOrgSysGlobals.addStandardOptionsAndProcess(parser,
                                                    exportAvailable=True)

    demo()

    BibleOrgSysGlobals.closedown(ProgName, ProgVersion)
# end of BibleReferencesLinksConverter.py
Ejemplo n.º 23
0
    def __validate(self):
        """
        Check/validate the loaded data.
        """
        assert (self._XMLtree)

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self._compulsoryAttributes and not self._optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self._compulsoryElements and not self._optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Get the sourceComponent to use as a record ID
                ID = element.find("sourceComponent").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    foundElement = element.find(elementName)
                    if foundElement is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, j))
                    else:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag )
                        if not foundElement.text:
                            logging.warning(
                                _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    foundElement = element.find(elementName)
                    if foundElement is not None:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoSubelements(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        if not foundElement.text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, j))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))
            if element.tail is not None and element.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after {} element in record {}"
                      ).format(element.tail, element.tag, j))
        if self._XMLtree.tail is not None and self._XMLtree.tail.strip():
            logging.error(
                _("Unexpected {!r} tail data after {} element").format(
                    self._XMLtree.tail, self._XMLtree.tag))
Ejemplo n.º 24
0
def UnboundBibleFileCheck(givenFolderName,
                          strictCheck=True,
                          autoLoad=False,
                          autoLoadBooks=False):
    """
    Given a folder, search for Unbound Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one Unbound Bible is found,
        returns the loaded UnboundBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("UnboundBibleFileCheck( {}, {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, str)
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (
            True,
            False,
    )

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("UnboundBibleFileCheck: Given {!r} folder is unreadable").format(
                givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("UnboundBibleFileCheck: Given {!r} path is not a folder").format(
                givenFolderName))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(" UnboundBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore: continue
            if not somethingUpperExt[
                    1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)

    # See if there's an UnboundBible project here in this given folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename in ('book_names.txt', 'Readme.txt'):
            looksHopeful = True
        elif thisFilename.endswith('_utf8.txt'):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(
                    thisFilename, givenFolderName)
                if firstLine is None:
                    continue  # seems we couldn't decode the file
                if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)":
                    if BibleOrgSysGlobals.verbosityLevel > 3:
                        print("UnB (unexpected) first line was {!r} in {}".
                              format(firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("UnboundBibleFileCheck got", numFound, givenFolderName,
                  lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = UnboundBible(
                givenFolderName, lastFilenameFound[:-9]
            )  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        print("    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("UnboundBibleFileCheck: {!r} subfolder is unreadable").
                format(tryFolderName))
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("    UnboundBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath): foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(
                    somethingUpper)
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith(ending):
                        ignore = True
                        break
                if ignore: continue
                if not somethingUpperExt[
                        1:] in extensionsToIgnore:  # Compare without the first dot
                    foundSubfiles.append(something)

        # See if there's an UB project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith('_utf8.txt'):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(
                        thisFilename, tryFolderName)
                    if firstLine is None:
                        continue  # seems we couldn't decode the file
                    if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)":
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("UnB (unexpected) first line was {!r} in {}".
                                  format(firstLine, thisFilename))
                            halt
                        continue
                foundProjects.append((
                    tryFolderName,
                    thisFilename,
                ))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("UnboundBibleFileCheck foundProjects", numFound,
                  foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            uB = UnboundBible(
                foundProjects[0][0], foundProjects[0][1]
                [:-9])  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 25
0
    def __validateAndExtractVerse(self, BBB, chapterNumber, thisBook, verse):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating XML verse…"))

        location = "verse in {} {}".format(BBB, chapterNumber)
        BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20')
        BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks')

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib, value in verse.items():
            if attrib == "n":
                verseNumber = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in verse element".format(
                        attrib, value))
        if BibleOrgSysGlobals.debugFlag: assert verseNumber
        location = "{}:{}".format(
            location, verseNumber)  # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
        #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        ## Handle verse subelements (notes and styled portions)
        #for subelement in verse:
        #if subelement.tag == VerseViewXMLBible.noteTag:
        #sublocation = "note in " + location
        #noteType = None
        #for attrib,value in subelement.items():
        #if attrib=="type": noteType = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if noteType and noteType not in ('variant',):
        #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
        #nText, nTail = subelement.text, subelement.tail
        ##print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
        #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
        #if nTail:
        #if '\n' in nTail:
        #print( "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
        #nTail = nTail.replace( '\n', ' ' )
        #vText += nTail
        #for sub2element in subelement:
        #if sub2element.tag == VerseViewXMLBible.styleTag:
        #sub2location = "style in " + sublocation
        #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' )
        #fs = css = idStyle = None
        #for attrib,value in sub2element.items():
        #if attrib=='fs': fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle
        #SFM = None
        #if fs == 'italic': SFM = '\\it'
        #elif fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: print( "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = sub2element.text.strip(), sub2element.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()
        #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) )

        #elif subelement.tag == VerseViewXMLBible.styleTag:
        #sublocation = "style in " + location
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
        #fs = css = idStyle = None
        #for attrib,value in subelement.items():
        #if attrib=="fs": fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs
        #SFM = None
        #if fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: print( "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = subelement.text.strip(), subelement.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        ##print( BBB, chapterNumber, sublocation )
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()

        #elif subelement.tag == VerseViewXMLBible.breakTag:
        #sublocation = "line break in " + location
        #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
        #art = None
        #for attrib,value in subelement.items():
        #if attrib=="art":
        #art = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl'
        ##print( BBB, chapterNumber, verseNumber )
        ##assert vText
        #if vText:
        #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
        #vText = ''
        #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
        ##bTail = subelement.tail
        ##if bTail: vText = bTail.strip()
        #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText:  # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                print(
                    "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}"
                    .format(BBB, chapterNumber, verseNumber, vText))
                vText = vText.replace('\n', ' ')
            thisBook.addLine('v', verseNumber + ' ' + vText)
            verseNumber = None
Ejemplo n.º 26
0
    def __validateAndExtractVerse( self, BBB, chapterNumber, thisBook, verse ):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML verse…") )

        location = "verse in {} {}".format( BBB, chapterNumber )
        BibleOrgSysGlobals.checkXMLNoTail( verse, location, 'l5ks' )

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib,value in verse.items():
            if attrib=="vnumber":
                verseNumber = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) )
        if BibleOrgSysGlobals.debugFlag: assert verseNumber
        location = "{}:{}".format( location, verseNumber ) # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
            #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        # Handle verse subelements (notes and styled portions)
        for subelement in verse:
            if subelement.tag == HaggaiXMLBible.noteTag:
                sublocation = "note in " + location
                noteType = None
                for attrib,value in subelement.items():
                    if attrib=="type": noteType = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if noteType and noteType not in ('variant',):
                    logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
                nText, nTail = subelement.text, subelement.tail
                #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
                vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
                if nTail:
                    if '\n' in nTail:
                        print( "HaggaiXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
                        nTail = nTail.replace( '\n', ' ' )
                    vText += nTail
                for subsubelement in subelement:
                    if subsubelement.tag == HaggaiXMLBible.styleTag:
                        subsublocation = "style in " + sublocation
                        BibleOrgSysGlobals.checkXMLNoSubelements( subsubelement, subsublocation, 'fyt4' )
                        fs = css = idStyle = None
                        for attrib,value in subsubelement.items():
                            if attrib=='fs': fs = value
                            #elif attrib=="css": css = value
                            #elif attrib=="id": idStyle = value
                            else: logging.warning( "Unprocessed {!r} attribute ({}) in style subsubelement".format( attrib, value ) )
                        if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle
                        SFM = None
                        if fs == 'italic': SFM = '\\it'
                        elif fs == 'super': SFM = '\\bdit'
                        elif fs == 'emphasis': SFM = '\\em'
                        else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
                        #if css == "font-style:italic": SFM = '\\it'
                        #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
                        #elif css == "color:#FF0000": SFM = '\\em'
                        #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
                        #elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
                        #else: print( "css is", css, "idStyle is", idStyle ); halt
                        sText, sTail = subsubelement.text.strip(), subsubelement.tail
                        if BibleOrgSysGlobals.debugFlag: assert sText
                        if SFM: vText += SFM+' ' + sText + SFM+'*'
                        else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
                        if sTail: vText += sTail.strip()
                    else: logging.error( "Expected to find {} but got {!r} in {}".format( HaggaiXMLBible.styleTag, subsubelement.tag, sublocation ) )

            elif subelement.tag == HaggaiXMLBible.styleTag:
                sublocation = "style in " + location
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
                fs = css = idStyle = None
                for attrib,value in subelement.items():
                    if attrib=="fs": fs = value
                    #elif attrib=="css": css = value
                    #elif attrib=="id": idStyle = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert fs
                SFM = None
                if fs == 'super': SFM = '\\bdit'
                elif fs == 'emphasis': SFM = '\\em'
                else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
                #if css == "font-style:italic": SFM = '\\it'
                #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
                #elif css == "color:#FF0000": SFM = '\\em'
                #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
                #elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
                #else: print( "css is", css, "idStyle is", idStyle ); halt
                sText, sTail = subelement.text.strip(), subelement.tail
                if BibleOrgSysGlobals.debugFlag: assert sText
                #print( BBB, chapterNumber, sublocation )
                if SFM: vText += SFM+' ' + sText + SFM+'*'
                else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
                if sTail: vText += sTail.strip()

            elif subelement.tag == HaggaiXMLBible.breakTag:
                sublocation = "line break in " + location
                BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
                art = None
                for attrib,value in subelement.items():
                    if attrib=="art":
                        art = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl'
                #print( BBB, chapterNumber, verseNumber )
                #assert vText
                if vText:
                    thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
                    vText = ''
                thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
                #bTail = subelement.tail
                #if bTail: vText = bTail.strip()
            else: logging.error( "Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText: # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                print( "HaggaiXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                vText = vText.replace( '\n', ' ' )
            thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
Ejemplo n.º 27
0
    def load( self, filename, folder=None, encoding='utf-8' ):
        """
        Load a single source USX XML file and extract the information.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("load( {}, {}, {} )").format( filename, folder, encoding ) )

        def loadParagraph( paragraphXML, paragraphlocation ):
            """
            Load a paragraph from the USX XML.
            In this context, paragraph means heading and intro lines,
                as well as paragraphs of verses.

            Uses (and updates) C,V information from the containing function.
            """
            nonlocal C, V

            # Process the attributes first
            paragraphStyle = None
            for attrib,value in paragraphXML.items():
                if attrib=='style':
                    paragraphStyle = value # This is basically the USFM marker name
                else:
                    logging.warning( _("CH46 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )

            # Now process the paragraph text (or write a paragraph marker anyway)
            paragraphText = paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else ''
            if version is None: paragraphText = paragraphText.rstrip() # Don't need to strip extra spaces in v2
            self.addLine( paragraphStyle, paragraphText )

            # Now process the paragraph subelements
            for element in paragraphXML:
                location = element.tag + ' ' + paragraphlocation
                #print( "USXXMLBibleBook.load", C, V, element.tag, location )
                if element.tag == 'verse': # milestone (not a container)
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    verseStyle = altNumber = None
                    for attrib,value in element.items():
                        if attrib=='number':
                            V = value
                        elif attrib=='style':
                            verseStyle = value
                        elif attrib=='altnumber':
                            altNumber = value
                        else:
                            logging.error( _("KR60 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    if verseStyle != 'v':
                        logging.error( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) )
                    #if altNumber: print( repr(verseStyle), repr(altNumber) ); halt
                    altStuff = ' \\va {}\\va*'.format( altNumber ) if altNumber else ''
                    self.addLine( verseStyle, V + altStuff + ' ' )
                    # Now process the tail (if there's one) which is the verse text
                    if element.tail:
                        vText = element.tail
                        if vText[0]=='\n': vText = vText.lstrip() # Paratext puts cross references on a new line
                        if vText:
                            #print( repr(vText) )
                            self.appendToLastLine( vText )
                elif element.tag == 'char':
                    # Process the attributes first
                    charStyle = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            charStyle = value # This is basically the USFM character marker name
                            #print( "  charStyle", charStyle )
                            assert not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle )
                        else:
                            logging.error( _("QU52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    charLine = "\\{} {} ".format( charStyle, element.text )
                    # Now process the subelements -- chars are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( '{} {}:{} {}'.format( self.BBB, C, V, element.tag ) )
                        if subelement.tag == 'char': # milestone (not a container)
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            subCharStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style': subCharStyle = value
                                elif attrib=='closed':
                                    assert value=='false'
                                    charClosed = False
                                else:
                                    logging.error( _("KS41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            charLine += "\\{} {}".format( subCharStyle, subelement.text )
                            if charClosed: charLine += "\\{}*".format( subCharStyle )
                            #if subelement.tail is not None: print( "  tail1", repr(subelement.tail) )
                            charLine += '' if subelement.tail is None else subelement.tail
                        else:
                            logging.error( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) )
                            self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) )
                    # A character field must be added to the previous field
                    #if element.tail is not None: print( " tail2", repr(element.tail) )
                    charTail = ''
                    if element.tail:
                        charTail = element.tail
                        if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts footnote parts on new lines
                    charLine += "\\{}*{}".format( charStyle, charTail )
                    #if debuggingThisModule: print( "USX.loadParagraph:", C, V, paragraphStyle, charStyle, repr(charLine) )
                    self.appendToLastLine( charLine )
                elif element.tag == 'note':
                    #print( "NOTE", BibleOrgSysGlobals.elementStr( element ) )
                    # Process the attributes first
                    noteStyle = noteCaller = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            noteStyle = value # This is basically the USFM marker name
                            assert noteStyle in ('x','f',)
                        elif attrib=='caller': noteCaller = value # Usually hyphen or a symbol to be used for the note
                        else:
                            logging.error( _("CY38 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    if noteCaller=='' and self.BBB=='NUM' and C=='10' and V=='36': noteCaller = '+' # Hack
                    assert noteStyle and noteCaller # both compulsory
                    noteLine = "\\{} {} ".format( noteStyle, noteCaller )
                    if element.text:
                        noteText = element.text.strip()
                        noteLine += noteText
                    # Now process the subelements -- notes are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( C, V, subelement.tag )
                        if subelement.tag == 'char': # milestone (not a container)
                            # Process the attributes first
                            charStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style':
                                    charStyle = value
                                elif attrib=='closed':
                                    assert value=='false'
                                    charClosed = False
                                else:
                                    logging.warning( _("GJ67 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            noteLine += "\\{} {}".format( charStyle, subelement.text )
                            # Now process the subelements -- notes are one of the few multiply embedded fields in USX
                            for sub2element in subelement:
                                sub2location = sub2element.tag + ' ' + sublocation
                                #print( C, V, sub2element.tag )
                                if sub2element.tag == 'char': # milestone (not a container)
                                    BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location )
                                    # Process the attributes first
                                    char2Style, char2Closed = None, True
                                    for attrib,value in sub2element.items():
                                        if attrib=='style':
                                            char2Style = value
                                        elif attrib=='closed':
                                            assert value=='false'
                                            char2Closed = False
                                        else:
                                            logging.warning( _("VH36 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                                    assert char2Closed
                                    noteLine += "\\{} {}\\{}*{}".format( char2Style, sub2element.text, char2Style, sub2element.tail if sub2element.tail else '' )
                            if charClosed: noteLine += "\\{}*".format( charStyle )
                            if subelement.tail:
                                charTail = subelement.tail
                                if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts cross reference parts on a new line
                                noteLine += charTail
                        elif subelement.tag == 'unmatched': # Used to denote errors in the source text
                            BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation )
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            unmmatchedMarker = None
                            for attrib,value in subelement.items():
                                if attrib=='marker':
                                    unmmatchedMarker = value
                                else:
                                    logging.warning( _("NV21 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            self.addPriorityError( 2, C, V, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) )
                        else:
                            logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) )
                            self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) )
                        if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail
                    #noteLine += "\\{}*".format( charStyle )
                    noteLine += "\\{}*".format( noteStyle )
                    if element.tail:
                        #if '\n' in element.tail: halt
                        noteTail = element.tail
                        if noteTail[0]=='\n': noteTail = noteTail.lstrip() # Paratext puts multiple cross-references on new lines
                        noteLine += noteTail
                    #print( "NoteLine", repr(noteLine) )
                    self.appendToLastLine( noteLine )
                elif element.tag == 'link': # Used to include extra resources
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    linkStyle = linkDisplay = linkTarget = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            linkStyle = value
                            assert linkStyle in ('jmp',)
                        elif attrib=='display':
                            linkDisplay = value # e.g., "click here"
                        elif attrib=='target':
                            linkTarget = value # e.g., some reference
                        else:
                            logging.warning( _("KW54 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    self.addPriorityError( 3, C, V, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) )
                elif element.tag == 'unmatched': # Used to denote errors in the source text
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    self.addPriorityError( 2, C, V, _("Unmatched element in {}").format( location) )
                else:
                    logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, C, V, location ) )
                    self.addPriorityError( 1, C, V, _("Unprocessed {} element").format( element.tag ) )
                    for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] )
                    if BibleOrgSysGlobals.debugFlag: halt
        # end of loadParagraph

        C = V = '0'
        loadErrors = []
        lastMarker = None

        if BibleOrgSysGlobals.verbosityLevel > 3: print( "  " + _("Loading {} from {}…").format( filename, folder ) )
        elif BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {}…").format( filename ) )
        self.isOneChapterBook = self.BBB in BibleOrgSysGlobals.BibleBooksCodes.getSingleChapterBooksList()
        self.sourceFilename = filename
        self.sourceFolder = folder
        self.sourceFilepath = os.path.join( folder, filename ) if folder else filename
        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError as err:
            logging.critical( exp("Loader parse error in xml file {}: {} {}").format( filename, sys.exc_info()[0], err ) )
            loadErrors.append( exp("Loader parse error in xml file {}: {} {}").format( filename, sys.exc_info()[0], err ) )
            self.addPriorityError( 100, C, V, _("Loader parse error in xml file {}: {}").format( filename, err ) )
        if BibleOrgSysGlobals.debugFlag: assert len ( self.tree ) # Fail here if we didn't load anything at all

        # Find the main container
        if 'tree' in dir(self) \
        and ( self.tree.tag=='usx' or self.tree.tag=='usfm' ): # Not sure why both are allowable
            location = "USX ({}) file".format( self.tree.tag )
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location )

            # Process the attributes first
            self.schemaLocation = ''
            version = None
            for attrib,value in self.tree.items():
                if attrib=='version': version = value
                else: logging.warning( _("DG84 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
            if version not in ( None, '2.0' ):
                logging.warning( _("Not sure if we can handle v{} USX files").format( version ) )

            # Now process the data
            for element in self.tree:
                sublocation = element.tag + " " + location
                if element.tag == 'book': # milestone (not a container)
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation )
                    # Process the attributes
                    idField = bookStyle = None
                    for attrib,value in element.items():
                        if attrib=='id' or attrib=='code':
                            idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            #if idField != BBB:
                            #    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        elif attrib=='style':
                            bookStyle = value
                        else:
                            logging.warning( _("MD12 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                    if bookStyle != 'id':
                        logging.warning( _("Unexpected style attribute ({}) in {}").format( bookStyle, sublocation ) )
                    idLine = idField
                    if element.text and element.text.strip(): idLine += ' ' + element.text
                    self.addLine( 'id', idLine )
                elif element.tag == 'chapter': # milestone (not a container)
                    V = '0'
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation )
                    # Process the attributes
                    chapterStyle = pubNumber = None
                    for attrib,value in element.items():
                        if attrib=='number':
                            C = value
                        elif attrib=='style':
                            chapterStyle = value
                        elif attrib=='pubnumber':
                            pubNumber = value
                        else:
                            logging.error( _("LY76 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                    if chapterStyle != 'c':
                        logging.warning( _("Unexpected style attribute ({}) in {}").format( chapterStyle, sublocation ) )
                    #if pubNumber: print( self.BBB, C, repr(pubNumber) ); halt
                    self.addLine( 'c', C )
                    if pubNumber: self.addLine( 'cp', pubNumber )
                elif element.tag == 'para':
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation )
                    USFMMarker = element.attrib['style'] # Get the USFM code for the paragraph style
                    if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( USFMMarker ):
                        #if lastMarker: self.addLine( lastMarker, lastText )
                        #lastMarker, lastText = USFMMarker, text
                        loadParagraph( element, sublocation )
                    elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( USFMMarker ): # the line begins with an internal USFM Marker -- append it to the previous line
                        text = element.text
                        if text is None: text = ''
                        if BibleOrgSysGlobals.debugFlag:
                            print( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, C, V, USFMMarker, text ) )
                            #halt # Not checked yet
                        if text:
                            loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, C, V, USFMMarker, text ) )
                            logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, C, V, text ) )
                        else: # no text
                            loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM Marker at beginning of line (with no text)").format( self.BBB, C, V, USFMMarker ) )
                            logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, C, V ) )
                        self.addPriorityError( 97, C, V, _("Found \\{} internal USFM Marker on new line in file").format( USFMMarker ) )
                        #lastText += '' if lastText.endswith(' ') else ' ' # Not always good to add a space, but it's their fault!
                        lastText =  '\\' + USFMMarker + ' ' + text
                        #print( "{} {} {} Now have {}:{!r}".format( self.BBB, C, V, lastMarker, lastText ) )
                    else: # the line begins with an unknown USFM Marker
                        try: status = element.attrib['status']
                        except KeyError: status = None
                        text = element.text
                        if text:
                            loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line with text: {}").format( self.BBB, C, V, USFMMarker, text ) )
                            logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, C, V, text ) )
                        else: # no text
                            loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line (with no text").format( self.BBB, C, V, USFMMarker ) )
                            logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, C, V ) )
                        self.addPriorityError( 100, C, V, _("Found \\{} unknown USFM Marker on new line in file").format( USFMMarker ) )
                        if status == 'unknown': # USX exporter already knew it was a bad marker
                            pass # Just drop it completely
                        else:
                            for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space
                                if USFMMarker.startswith( tryMarker ): # Let's try changing it
                                    if lastMarker: self.addLine( lastMarker, lastText )
                                    lastMarker, lastText = tryMarker, USFMMarker[len(tryMarker):] + ' ' + text
                                    loadErrors.append( _("{} {}:{} Changed '\\{}' unknown USFM Marker to {!r} at beginning of line: {}").format( self.BBB, C, V, USFMMarker, tryMarker, text ) )
                                    logging.warning( _("Changed '\\{}' unknown USFM Marker to {!r} after {} {}:{} at beginning of line: {}").format( USFMMarker, tryMarker, self.BBB, C, V, text ) )
                                    break
                        # Otherwise, don't bother processing this line -- it'll just cause more problems later on
                else:
                    logging.error( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, C, V, sublocation ) )
                    self.addPriorityError( 1, C, V, _("Unprocessed {} element").format( element.tag ) )

        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
Ejemplo n.º 28
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBBFromText(
                bookName)  # Booknames are usually in English
            if not BBB:  # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText(
                    bookName)  # Try non-English booknames
                #print( "bookName", bookName, BBB )
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print(_("Validating {} {}…").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                thisBook.addLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     ProgNameVersion))
                thisBook.addLine('h', bookName)
                thisBook.addLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        BibleOrgSysGlobals.checkXMLNoText(
                            element, sublocation, 'j3jd')
                        BibleOrgSysGlobals.checkXMLNoTail(
                            element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find {!r} but got {!r}".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print("  Saving {} into results…".format(BBB))
                self.stashBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: {!r}").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName
Ejemplo n.º 29
0
    def __validateAndExtractChapter(self, BBB, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating XML chapter…"))

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "n":
                chapterNumber = value
            elif attrib == "VERSES":
                numVerses = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace(
                'of Solomon ', '')  # Fix a mistake in the Chinese_SU module
            thisBook.addLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for {}".format(BBB))

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format(BBB, chapterNumber)
                BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks')
                verseNumber = toVerseNumber = None
                for attrib, value in element.items():
                    if attrib == "n":
                        verseNumber = value
                    elif attrib == "t":
                        toVerseNumber = value
                    else:
                        logging.warning(
                            "Unprocessed {!r} attribute ({}) in verse element".
                            format(attrib, value))
                if BibleOrgSysGlobals.debugFlag: assert verseNumber
                #thisBook.addLine( 'v', verseNumber )
                vText = element.text if element.text else ''
                for subelement in element:
                    sub2location = "{} in {}".format(subelement.tag,
                                                     sublocation)
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sub2location, 'ks03')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sub2location, 'ks05')
                    if subelement.tag == 'i':
                        vText += '\\it {}\\it*{}'.format(
                            subelement.text, subelement.tail)
                    else:
                        logging.error(
                            "Expected to find 'i' but got {!r}".format(
                                subelement.tag))
                vText += element.tail if element.tail else ''
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, verseNumber))
                #print( 'vText1', vText )
                if vText:  # This is the main text of the verse (follows the verse milestone)
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText:  # This is how they represent poety
                        #print( "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate(vText.split('\n')):
                            if j == 0:
                                thisBook.addLine('q1', '')
                                thisBook.addLine('v',
                                                 verseNumber + ' ' + textBit)
                            else:
                                thisBook.addLine('q1', textBit)
                    else:  # Just one verse line
                        thisBook.addLine('v', verseNumber + ' ' + vText)
                #print( 'vText2', vText )
            else:
                logging.error("Expected to find {!r} but got {!r}".format(
                    OpenSongXMLBible.verseTag, element.tag))
Ejemplo n.º 30
0
    def writeOpenSongBook(writerObject, BBB, bkData):
        """Writes a book to the OpenSong XML writerObject."""
        #print( 'BIBLEBOOK', [('bnumber',BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)), ('bname',BibleOrgSysGlobals.BibleBooksCodes.getEnglishName_NR(BBB)), ('bsname',BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation(BBB))] )
        OSISAbbrev = BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation(
            BBB)
        if not OSISAbbrev:
            logging.warning(
                "toOpenSong: Can't write {} OpenSong book because no OSIS code available"
                .format(BBB))
            unhandledBooks.append(BBB)
            return
        writerObject.writeLineOpen('b', ('n', bkData.getAssumedBookNames()[0]))
        haveOpenChapter, startedFlag, gotVP, accumulator = False, False, None, ""
        C, V = '-1', '-1'  # So first/id line starts at -1:0
        for processedBibleEntry in bkData._processedLines:  # Process internal Bible data lines
            marker, text, extras = processedBibleEntry.getMarker(
            ), processedBibleEntry.getCleanText(
            ), processedBibleEntry.getExtras()
            #print( marker, repr(text) )
            #if text: assert text[0] != ' '
            if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS:
                continue  # Just ignore added markers -- not needed here
            if marker in USFM_PRECHAPTER_MARKERS:
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag:
                    assert C == '-1' or marker == 'rem' or marker.startswith(
                        'mte')
                V = str(int(V) + 1)

            if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in (
                    'ie', ):  # Just ignore these lines
                ignoredMarkers.add(marker)
            elif marker == 'c':
                if accumulator:
                    writerObject.writeLineOpenClose('v', accumulator,
                                                    ('n', verseNumberString))
                    accumulator = ''
                if haveOpenChapter:
                    writerObject.writeLineClose('c')
                C, V = text, '0'
                writerObject.writeLineOpen('c', ('n', text))
                haveOpenChapter = True
            elif marker in (
                    'c#',
            ):  # These are the markers that we can safely ignore for this export
                ignoredMarkers.add(marker)
            elif marker == 'vp#':  # This precedes a v field and has the verse number to be printed
                gotVP = text  # Just remember it for now
            elif marker == 'v':
                V = text
                if gotVP:  # this is the verse number to be published
                    text = gotVP
                    gotVP = None
                startedFlag = True
                if accumulator:
                    writerObject.writeLineOpenClose('v', accumulator,
                                                    ('n', verseNumberString))
                    accumulator = ''
                #print( "Text {!r}".format( text ) )
                if not text:
                    logging.warning("createOpenSongXML: Missing text for v")
                    continue
                verseNumberString = text.replace('<', '').replace(
                    '>', ''
                ).replace(
                    '"', ''
                )  # Used below but remove anything that'll cause a big XML problem later

            elif marker in ('mt1','mt2','mt3','mt4', 'mte1','mte2','mte3','mte4', 'ms1','ms2','ms3','ms4', ) \
            or marker in USFM_ALL_INTRODUCTION_MARKERS \
            or marker in ('s1','s2','s3','s4', 'r','sr','mr', 'd','sp','cd', 'cl','lit', ):
                ignoredMarkers.add(marker)
            elif marker in USFM_BIBLE_PARAGRAPH_MARKERS:
                if BibleOrgSysGlobals.debugFlag: assert not text and not extras
                ignoredMarkers.add(marker)
            elif marker in (
                    'b',
                    'nb',
                    'ib',
            ):
                if BibleOrgSysGlobals.debugFlag: assert not text and not extras
                ignoredMarkers.add(marker)
            elif marker in (
                    'v~',
                    'p~',
            ):
                if BibleOrgSysGlobals.debugFlag: assert text or extras
                if not text:  # this is an empty (untranslated) verse
                    text = '- - -'  # but we'll put in a filler
                if startedFlag:
                    accumulator += (' ' if accumulator else
                                    '') + BibleOrgSysGlobals.makeSafeXML(text)
            else:
                if text:
                    logging.warning(
                        "toOpenSong: lost text in {} field in {} {}:{} {!r}".
                        format(marker, BBB, C, V, text))
                    #if BibleOrgSysGlobals.debugFlag: halt
                if extras:
                    logging.warning(
                        "toOpenSong: lost extras in {} field in {} {}:{}".
                        format(marker, BBB, C, V))
                    #if BibleOrgSysGlobals.debugFlag: halt
                unhandledMarkers.add(marker)
            if extras and marker not in (
                    'v~',
                    'p~',
            ) and marker not in ignoredMarkers:
                logging.critical(
                    "toOpenSong: extras not handled for {} at {} {}:{}".format(
                        marker, BBB, C, V))
        if accumulator:
            writerObject.writeLineOpenClose('v', accumulator,
                                            ('n', verseNumberString))
        if haveOpenChapter:
            writerObject.writeLineClose('c')
        writerObject.writeLineClose('b')
    def loadSystems(self, folder=None):
        """
        Load and pre-process the specified booksNames systems.
        """
        if not self.__XMLSystems:  # Only ever do this once
            if folder == None:
                folder = os.path.join(
                    os.path.dirname(__file__), "DataFiles",
                    "BookNames")  # Relative to module, not cwd
            self.__XMLFolder = folder
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print(_("Loading book names systems from {}…").format(folder))
            for filename in os.listdir(folder):
                filepart, extension = os.path.splitext(filename)
                if extension.upper() == '.XML' and filepart.upper().startswith(
                        self.__filenameBase.upper() + "_"):
                    booksNamesSystemCode = filepart[len(self.__filenameBase) +
                                                    1:]
                    if BibleOrgSysGlobals.verbosityLevel > 3:
                        print(
                            _("Loading {} books names system from {}…").format(
                                booksNamesSystemCode, filename))
                    self.__XMLSystems[booksNamesSystemCode] = {}
                    self.__XMLSystems[booksNamesSystemCode][
                        "languageCode"] = booksNamesSystemCode.split('_', 1)[0]
                    self.__XMLSystems[booksNamesSystemCode][
                        'tree'] = ElementTree().parse(
                            os.path.join(folder, filename))
                    assert self.__XMLSystems[booksNamesSystemCode][
                        'tree']  # Fail here if we didn't load anything at all

                    # Check and remove the header element
                    if self.__XMLSystems[booksNamesSystemCode][
                            'tree'].tag == self.XMLTreeTag:
                        header = self.__XMLSystems[booksNamesSystemCode][
                            'tree'][0]
                        if header.tag == self.headerTag:
                            self.__XMLSystems[booksNamesSystemCode][
                                "header"] = header
                            self.__XMLSystems[booksNamesSystemCode][
                                'tree'].remove(header)
                            BibleOrgSysGlobals.checkXMLNoText(header, "header")
                            BibleOrgSysGlobals.checkXMLNoTail(header, "header")
                            BibleOrgSysGlobals.checkXMLNoAttributes(
                                header, "header")
                            if len(header) > 1:
                                logging.info(
                                    _("Unexpected elements in header"))
                            elif len(header) == 0:
                                logging.info(
                                    _("Missing work element in header"))
                            else:
                                work = header[0]
                                BibleOrgSysGlobals.checkXMLNoText(
                                    work, "work in header")
                                BibleOrgSysGlobals.checkXMLNoTail(
                                    work, "work in header")
                                BibleOrgSysGlobals.checkXMLNoAttributes(
                                    work, "work in header")
                                if work.tag == "work":
                                    self.__XMLSystems[booksNamesSystemCode][
                                        'version'] = work.find('version').text
                                    self.__XMLSystems[booksNamesSystemCode][
                                        "date"] = work.find("date").text
                                    self.__XMLSystems[booksNamesSystemCode][
                                        "title"] = work.find("title").text
                                else:
                                    logging.warning(
                                        _("Missing work element in header"))
                        else:
                            logging.warning(
                                _("Missing header element (looking for {!r} tag)"
                                  ).format(self.headerTag))
                    else:
                        logging.error(
                            _("Expected to load {!r} but got {!r}").format(
                                self.XMLTreeTag,
                                self.__XMLSystems[booksNamesSystemCode]
                                ['tree'].tag))
                    bookCount = 0  # There must be an easier way to do this
                    for subelement in self.__XMLSystems[booksNamesSystemCode][
                            'tree']:
                        bookCount += 1
                    if BibleOrgSysGlobals.verbosityLevel > 2:
                        print(
                            _("    Loaded {} books for {}").format(
                                bookCount, booksNamesSystemCode))
                    logging.info(
                        _("    Loaded {} books for {}").format(
                            bookCount, booksNamesSystemCode))

                    if BibleOrgSysGlobals.strictCheckingFlag:
                        self.__validateSystem(booksNamesSystemCode)
        return self
Ejemplo n.º 32
0
def createMySwordModule(self, outputFolder, controlDict):
    """
    Create a SQLite3 database module for the program MySword.

    self here is a Bible object with _processedLines
    """
    import tarfile
    from InternalBibleInternals import BOS_ADDED_NESTING_MARKERS, BOS_NESTING_MARKERS
    from theWordBible import theWordOTBookLines, theWordNTBookLines, theWordBookLines, theWordHandleIntroduction, theWordComposeVerseLine

    def writeMSBook(sqlObject, BBB, ourGlobals):
        """
        Writes a book to the MySword sqlObject file.
        """
        nonlocal lineCount
        bkData = self.books[BBB] if BBB in self.books else None
        #print( bkData._processedLines )
        verseList = BOS.getNumVersesList(BBB)
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        numC, numV = len(verseList), verseList[0]

        ourGlobals['line'], ourGlobals['lastLine'] = '', None
        ourGlobals['pi1'] = ourGlobals['pi2'] = ourGlobals['pi3'] = ourGlobals[
            'pi4'] = ourGlobals['pi5'] = ourGlobals['pi6'] = ourGlobals[
                'pi7'] = False
        if bkData:
            # Write book headings (stuff before chapter 1)
            ourGlobals['line'] = theWordHandleIntroduction(
                BBB, bkData, ourGlobals)

            # Write the verses
            C = V = 1
            ourGlobals['lastLine'] = ourGlobals['lastBCV'] = None
            while True:
                verseData = None
                if bkData:
                    try:
                        result = bkData.getContextVerseData((
                            BBB,
                            str(C),
                            str(V),
                        ))
                        verseData, context = result
                    except KeyError:  # Missing verses
                        logging.warning(
                            "BibleWriter.createMySwordModule: missing source verse at {} {}:{}"
                            .format(BBB, C, V))
                    # Handle some common versification anomalies
                    if (BBB, C, V) == ('JN3', 1,
                                       14):  # Add text for v15 if it exists
                        try:
                            result15 = bkData.getContextVerseData((
                                'JN3',
                                '1',
                                '15',
                            ))
                            verseData15, context15 = result15
                            verseData.extend(verseData15)
                        except KeyError:
                            pass  #  just ignore it
                    elif (BBB, C, V) == ('REV', 12,
                                         17):  # Add text for v15 if it exists
                        try:
                            result18 = bkData.getContextVerseData((
                                'REV',
                                '12',
                                '18',
                            ))
                            verseData18, context18 = result18
                            verseData.extend(verseData18)
                        except KeyError:
                            pass  #  just ignore it
                    composedLine = ''
                    if verseData:
                        composedLine = theWordComposeVerseLine(
                            BBB, C, V, verseData, ourGlobals)
                    # Stay one line behind (because paragraph indicators get appended to the previous line)
                    if ourGlobals['lastBCV'] is not None \
                    and ourGlobals['lastLine']: # don't bother writing blank (unfinished?) verses
                        sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \
                            (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) )
                        lineCount += 1
                    ourGlobals['lastLine'] = composedLine
                ourGlobals['lastBCV'] = (nBBB, C, V)
                V += 1
                if V > numV:
                    C += 1
                    if C > numC:
                        break
                    else:  # next chapter only
                        numV = verseList[C - 1]
                        V = 1
            #assert not ourGlobals['line'] and not ourGlobals['lastLine'] #  We should have written everything

        # Write the last line of the file
        if ourGlobals[
                'lastLine']:  # don't bother writing blank (unfinished?) verses
            sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \
                (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) )
            lineCount += 1

    # end of createMySwordModule.writeMSBook

    # Set-up their Bible reference system
    BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')
    #BRL = BibleReferenceList( BOS, BibleObject=None )

    # Try to figure out if it's an OT/NT or what (allow for up to 4 extra books like FRT,GLS, etc.)
    if len(self) <= (39 + 4) and self.containsAnyOT39Books(
    ) and not self.containsAnyNT27Books():
        testament, startBBB, endBBB = 'OT', 'GEN', 'MAL'
        booksExpected, textLineCountExpected, checkTotals = 39, 23145, theWordOTBookLines
    elif len(self) <= (27 + 4) and self.containsAnyNT27Books(
    ) and not self.containsAnyOT39Books():
        testament, startBBB, endBBB = 'NT', 'MAT', 'REV'
        booksExpected, textLineCountExpected, checkTotals = 27, 7957, theWordNTBookLines
    else:  # assume it's an entire Bible
        testament, startBBB, endBBB = 'BOTH', 'GEN', 'REV'
        booksExpected, textLineCountExpected, checkTotals = 66, 31102, theWordBookLines
    extension = '.bbl.mybible'

    if BibleOrgSysGlobals.verbosityLevel > 2:
        print(_("  Exporting to MySword format…"))
    mySettings = {}
    mySettings['unhandledMarkers'] = set()
    handledBooks = []

    if 'MySwordOutputFilename' in controlDict:
        filename = controlDict['MySwordOutputFilename']
    elif self.sourceFilename:
        filename = self.sourceFilename
    elif self.shortName:
        filename = self.shortName
    elif self.abbreviation:
        filename = self.abbreviation
    elif self.name:
        filename = self.name
    else:
        filename = 'export'
    if not filename.endswith(extension):
        filename += extension  # Make sure that we have the right file extension
    filepath = os.path.join(outputFolder,
                            BibleOrgSysGlobals.makeSafeFilename(filename))
    if os.path.exists(filepath): os.remove(filepath)
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print('  createMySwordModule: ' + _("Writing {!r}…").format(filepath))
    conn = sqlite3.connect(filepath)
    cursor = conn.cursor()

    # First write the settings Details table
    exeStr = 'CREATE TABLE Details(Description NVARCHAR(255), Abbreviation NVARCHAR(50), Comments TEXT, Version TEXT, VersionDate DATETIME, PublishDate DATETIME, RightToLeft BOOL, OT BOOL, NT BOOL, Strong BOOL'  # incomplete
    customCSS = self.getSetting('CustomCSS')
    if customCSS: exeStr += ', CustomCSS TEXT'
    exeStr += ')'
    cursor.execute(exeStr)

    values = []

    description = self.getSetting('Description')
    if not description: description = self.getSetting('description')
    if not description: description = self.name
    values.append(description)

    if self.abbreviation: abbreviation = self.abbreviation
    else: abbreviation = self.getSetting('WorkAbbreviation')
    if not abbreviation: abbreviation = self.name[:3].upper()
    values.append(abbreviation)

    comments = self.getSetting('Comments')
    values.append(comments)

    version = self.getSetting('Version')
    values.append(version)

    versionDate = self.getSetting('VersionDate')
    values.append(versionDate)

    publishDate = self.getSetting('PublishDate')
    values.append(publishDate)

    rightToLeft = self.getSetting('RightToLeft')
    values.append(rightToLeft)

    values.append(True if testament == 'OT' or testament == 'BOTH' else False)
    values.append(True if testament == 'NT' or testament == 'BOTH' else False)

    Strong = self.getSetting('Strong')
    values.append(Strong if Strong else False)

    if customCSS: values.append(customCSS)

    exeStr = 'INSERT INTO "Details" VALUES(' + '?,' * (len(values) - 1) + '?)'
    #print( exeStr, values )
    cursor.execute(exeStr, values)
    #if BibleOrgSysGlobals.debugFlag: cursor.execute( exeStr, values )
    #else: # Not debugging
    #try: cursor.execute( exeStr, values )
    #except sqlite3.InterfaceError:
    #logging.critical( "SQLite3 Interface error executing {} with {}".format( exeStr, values ) )

    # Now create and fill the Bible table
    cursor.execute(
        'CREATE TABLE Bible(Book INT, Chapter INT, Verse INT, Scripture TEXT, Primary Key(Book,Chapter,Verse))'
    )
    conn.commit()  # save (commit) the changes
    BBB, lineCount = startBBB, 0
    while True:  # Write each Bible book in the KJV order
        writeMSBook(cursor, BBB, mySettings)
        conn.commit()  # save (commit) the changes
        handledBooks.append(BBB)
        if BBB == endBBB: break
        BBB = BOS.getNextBookCode(BBB)
    conn.commit()  # save (commit) the changes
    cursor.close()

    if mySettings['unhandledMarkers']:
        logging.warning(
            "BibleWriter.createMySwordModule: Unhandled markers were {}".
            format(mySettings['unhandledMarkers']))
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  " +
                  _("WARNING: Unhandled createMySwordModule markers were {}"
                    ).format(mySettings['unhandledMarkers']))
    unhandledBooks = []
    for BBB in self.getBookList():
        if BBB not in handledBooks: unhandledBooks.append(BBB)
    if unhandledBooks:
        logging.warning("createMySwordModule: Unhandled books were {}".format(
            unhandledBooks))
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  " +
                  _("WARNING: Unhandled createMySwordModule books were {}"
                    ).format(unhandledBooks))

    # Now create the gzipped file
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("  Compressing {} MySword file…".format(filename))
    tar = tarfile.open(filepath + '.gz', 'w:gz')
    tar.add(filepath)
    tar.close()

    if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
        print("  BibleWriter.createMySwordModule finished successfully.")
    return True
Ejemplo n.º 33
0
    def load( self, filename, folder=None, encoding='utf-8' ):
        """
        Load a single source USX XML file and extract the information.
        """

        def loadParagraph( paragraphXML, paragraphlocation ):
            """ Load a paragraph from the USX XML.
                Uses (and updates) c,v information from the containing function. """
            nonlocal c, v

            # Process the attributes first
            paragraphStyle = None
            for attrib,value in paragraphXML.items():
                if attrib=='style':
                    paragraphStyle = value # This is basically the USFM marker name
                else:
                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )

            # Now process the paragraph text (or write a paragraph marker anyway)
            self.addLine( paragraphStyle, paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else '' )

            # Now process the paragraph subelements
            for element in paragraphXML:
                location = element.tag + ' ' + paragraphlocation
                #print( "USXXMLBibleBook.load", c, v, element.tag, location )
                if element.tag == 'verse': # milestone (not a container)
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    verseStyle = None
                    for attrib,value in element.items():
                        if attrib=='number':
                            v = value
                        elif attrib=='style':
                            verseStyle = value
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    if verseStyle != 'v':
                        logging.warning( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) )
                    self.addLine( verseStyle, v + ' ' )
                    # Now process the tail (if there's one) which is the verse text
                    if element.tail:
                        vText = element.tail.strip()
                        if vText:
                            #print( repr(vText) )
                            self.appendToLastLine( vText )
                elif element.tag == 'char':
                    # Process the attributes first
                    charStyle = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            charStyle = value # This is basically the USFM character marker name
                            #print( "  charStyle", charStyle )
                            assert( not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle ) )
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    charLine = "\\{} {} ".format( charStyle, element.text )
                    # Now process the subelements -- chars are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( c, v, element.tag )
                        if subelement.tag == 'char': # milestone (not a container)
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            subCharStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style': subCharStyle = value
                                elif attrib=='closed':
                                    assert( value=='false' )
                                    charClosed = False
                                else:
                                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            charLine += "\\{} {}".format( subCharStyle, subelement.text )
                            if charClosed: charLine += "\\{}*".format( subCharStyle )
                            charLine += '' if subelement.tail is None else subelement.tail.strip()
                        else:
                            logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) )
                            self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) )
                    # A character field must be added to the previous field
                    charLine += "\\{}*{}".format( charStyle, '' if element.tail is None else element.tail.strip() )
                    if debuggingThisModule: print( "USX.loadParagraph:", c, v, paragraphStyle, charStyle, repr(charLine) )
                    self.appendToLastLine( charLine )
                elif element.tag == 'note':
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    # Process the attributes first
                    noteStyle = noteCaller = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            noteStyle = value # This is basically the USFM marker name
                            assert( noteStyle in ('x','f',) )
                        elif attrib=='caller':
                            noteCaller = value # Usually hyphen or a symbol to be used for the note
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    assert( noteStyle and noteCaller ) # both compulsory
                    noteLine = "\\{} {} ".format( noteStyle, noteCaller )
                    # Now process the subelements -- notes are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( c, v, element.tag )
                        if subelement.tag == 'char': # milestone (not a container)
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            charStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style':
                                    charStyle = value
                                elif attrib=='closed':
                                    assert( value=='false' )
                                    charClosed = False
                                else:
                                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            noteLine += "\\{} {}".format( charStyle, subelement.text )
                            if charClosed: noteLine += "\\{}*".format( charStyle )
                            noteLine += '' if subelement.tail is None else subelement.tail.strip()
                        elif subelement.tag == 'unmatched': # Used to denote errors in the source text
                            BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation )
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            unmmatchedMarker = None
                            for attrib,value in subelement.items():
                                if attrib=='marker':
                                    unmmatchedMarker = value
                                else:
                                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            self.addPriorityError( 2, c, v, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) )
                        else:
                            logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) )
                            self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) )
                    if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail
                    #noteLine += "\\{}*".format( charStyle )
                    noteLine += "\\{}*".format( noteStyle )
                    if element.tail:
                        noteText = element.tail.strip()
                        noteLine += noteText
                    self.appendToLastLine( noteLine )
                elif element.tag == 'link': # Used to include extra resources
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    linkStyle = linkDisplay = linkTarget = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            linkStyle = value
                            assert( linkStyle in ('jmp',) )
                        elif attrib=='display':
                            linkDisplay = value # e.g., "click here"
                        elif attrib=='target':
                            linkTarget = value # e.g., some reference
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    self.addPriorityError( 3, c, v, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) )
                elif element.tag == 'unmatched': # Used to denote errors in the source text
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    self.addPriorityError( 2, c, v, _("Unmatched element in {}").format( location) )
                else:
                    logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, c, v, location ) )
                    self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                    for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] )
                    if BibleOrgSysGlobals.debugFlag: halt
        # end of loadParagraph

        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {}...").format( filename ) )
        self.isOneChapterBook = self.BBB in BibleOrgSysGlobals.BibleBooksCodes.getSingleChapterBooksList()
        self.sourceFilename = filename
        self.sourceFolder = folder
        self.sourceFilepath = os.path.join( folder, filename ) if folder else filename
        self.tree = ElementTree().parse( self.sourceFilepath )
        assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        c = v = '0'
        loadErrors = []
        lastMarker = None

        # Find the main container
        if self.tree.tag=='usx' or self.tree.tag=='usfm': # Not sure why both are allowable
            location = "USX ({}) file".format( self.tree.tag )
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location )

            # Process the attributes first
            self.schemaLocation = ''
            version = None
            for attrib,value in self.tree.items():
                if attrib=='version': version = value
                logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
            if version not in ( None, '2.0' ):
                logging.warning( _("Not sure if we can handle v{} USX files").format( version ) )

            # Now process the data
            for element in self.tree:
                sublocation = element.tag + " " + location
                if element.tag == 'book': # milestone (not a container)
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation )
                    # Process the attributes
                    idField = bookStyle = None
                    for attrib,value in element.items():
                        if attrib=='id' or attrib=='code':
                            idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            #if idField != BBB:
                            #    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        elif attrib=='style':
                            bookStyle = value
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                    if bookStyle != 'id':
                        logging.warning( _("Unexpected style attribute ({}) in {}").format( bookStyle, sublocation ) )
                    idLine = idField
                    if element.text and element.text.strip(): idLine += ' ' + element.text
                    self.addLine( 'id', idLine )
                elif element.tag == 'chapter': # milestone (not a container)
                    v = '0'
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation )
                    # Process the attributes
                    chapterStyle = None
                    for attrib,value in element.items():
                        if attrib=='number':
                            c = value
                        elif attrib=='style':
                            chapterStyle = value
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                    if chapterStyle != 'c':
                        logging.warning( _("Unexpected style attribute ({}) in {}").format( chapterStyle, sublocation ) )
                    self.addLine( 'c', c )
                elif element.tag == 'para':
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation )
                    USFMMarker = element.attrib['style'] # Get the USFM code for the paragraph style
                    if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( USFMMarker ):
                        #if lastMarker: self.addLine( lastMarker, lastText )
                        #lastMarker, lastText = USFMMarker, text
                        loadParagraph( element, sublocation )
                    elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( USFMMarker ): # the line begins with an internal USFM Marker -- append it to the previous line
                        text = element.text
                        if text is None: text = ''
                        if BibleOrgSysGlobals.debugFlag:
                            print( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, c, v, USFMMarker, text ) )
                            #halt # Not checked yet
                        if text:
                            loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM marker at beginning of line with text: {}").format( self.BBB, c, v, USFMMarker, text ) )
                            logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, c, v, text ) )
                        else: # no text
                            loadErrors.append( _("{} {}:{} Found '\\{}' internal USFM Marker at beginning of line (with no text)").format( self.BBB, c, v, USFMMarker ) )
                            logging.warning( _("Found '\\{}' internal USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, c, v ) )
                        self.addPriorityError( 97, c, v, _("Found \\{} internal USFM Marker on new line in file").format( USFMMarker ) )
                        #lastText += '' if lastText.endswith(' ') else ' ' # Not always good to add a space, but it's their fault!
                        lastText =  '\\' + USFMMarker + ' ' + text
                        #print( "{} {} {} Now have {}:{!r}".format( self.BBB, c, v, lastMarker, lastText ) )
                    else: # the line begins with an unknown USFM Marker
                        text = element.text
                        if text:
                            loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line with text: {}").format( self.BBB, c, v, USFMMarker, text ) )
                            logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line with text: {}").format( USFMMarker, self.BBB, c, v, text ) )
                        else: # no text
                            loadErrors.append( _("{} {}:{} Found '\\{}' unknown USFM Marker at beginning of line (with no text").format( self.BBB, c, v, USFMMarker ) )
                            logging.error( _("Found '\\{}' unknown USFM Marker after {} {}:{} at beginning of line (with no text)").format( USFMMarker, self.BBB, c, v ) )
                        self.addPriorityError( 100, c, v, _("Found \\{} unknown USFM Marker on new line in file").format( USFMMarker ) )
                        for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space
                            if USFMMarker.startswith( tryMarker ): # Let's try changing it
                                if lastMarker: self.addLine( lastMarker, lastText )
                                lastMarker, lastText = tryMarker, USFMMarker[len(tryMarker):] + ' ' + text
                                loadErrors.append( _("{} {}:{} Changed '\\{}' unknown USFM Marker to {!r} at beginning of line: {}").format( self.BBB, c, v, USFMMarker, tryMarker, text ) )
                                logging.warning( _("Changed '\\{}' unknown USFM Marker to {!r} after {} {}:{} at beginning of line: {}").format( USFMMarker, tryMarker, self.BBB, c, v, text ) )
                                break
                        # Otherwise, don't bother processing this line -- it'll just cause more problems later on
                else:
                    logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, c, v, sublocation ) )
                    self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
    def __validateSystem(self, systemName):
        """
        Checks for basic formatting/content errors in a Bible book name system.
        """
        assert systemName
        assert self.__XMLSystems[systemName]['tree']

        if len(self.__XMLSystems[systemName]["languageCode"]) != 3:
            logging.error(
                _("Couldn't find 3-letter language code in {!r} book names system"
                  ).format(systemName))
        #if self.__ISOLanguages and not self.__ISOLanguages.isValidLanguageCode( self.__XMLSystems[systemName]["languageCode"] ): # Check that we have a valid language code
        #logging.error( _("Unrecognized {!r} ISO-639-3 language code in {!r} book names system").format( self.__XMLSystems[systemName]["languageCode"], systemName ) )

        uniqueDict = {}
        for index in range(0, len(self.mainElementTags)):
            for elementName in self.uniqueElements[index]:
                uniqueDict["Element_" + str(index) + "_" + elementName] = []
            for attributeName in self.uniqueAttributes[index]:
                uniqueDict["Attribute_" + str(index) + "_" +
                           attributeName] = []

        expectedID = 1
        for k, element in enumerate(self.__XMLSystems[systemName]['tree']):
            if element.tag in self.mainElementTags:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                index = self.mainElementTags.index(element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes[index]:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {} in {}"
                              ).format(attributeName, element.tag, k,
                                       systemName))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {} in {}"
                              ).format(attributeName, element.tag, k,
                                       systemName))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes[index]:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {} in {}"
                                  ).format(attributeName, element.tag, k,
                                           systemName))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes[
                            index] and attributeName not in self.optionalAttributes[
                                index]:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {} in {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k, systemName))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes[index]:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        str(index) + "_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {} in {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k, systemName))
                        uniqueDict["Attribute_" + str(index) + "_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements[index]:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing (record {}) in {}"
                              ).format(elementName, k, systemName))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank (record {}) in {}"
                              ).format(elementName, k, systemName))

                # Check optional elements
                for elementName in self.optionalElements[index]:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank (record {}) in {}"
                                  ).format(elementName, k, systemName))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements[
                            index] and subelement.tag not in self.optionalElements[
                                index]:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found (record {}) in {} {}"
                              ).format(subelement.tag, subelement.text, k,
                                       systemName, element.tag))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements[index]:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + str(index) + "_" +
                                              elementName]:
                            myLogging = logging.info if element.tag == 'BibleDivisionNames' else logging.error
                            myLogging(
                                _("Found {!r} data repeated in {!r} element (record {}) in {}"
                                  ).format(text, elementName, k, systemName))
                        uniqueDict["Element_" + str(index) + "_" +
                                   elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {} in {}").format(
                        element.tag, k, systemName))
Ejemplo n.º 35
0
        print( gsc ) # Just print a summary

        if BibleOrgSysGlobals.commandLineArguments.export:
            print( "Exports aren't written yet!" )
            #hlc.exportDataToPython() # Produce the .py tables
            #hlc.exportDataToC() # Produce the .h tables
            halt


    if 1: # demonstrate the Greek Lexicon class
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "\nDemonstrating the Greek Lexicon class…" )
        hl = GreekLexicon( testFolder ) # Load and process the XML
        print( hl ) # Just print a summary
        print()
        for strongsKey in ('G1','G123','G165','G1732','G1979','G2011','G5624','G5625',): # Last one is invalid
            print( '\n' + strongsKey )
            print( " Data:", hl.getStrongsEntryData( strongsKey ) )
            print( " Pronunciation:", hl.getStrongsEntryField( strongsKey, 'pronunciation' ) )
            print( " HTML:", hl.getStrongsEntryHTML( strongsKey ) )
# end of demo

if __name__ == '__main__':
    # Configure basic set-up
    parser = BibleOrgSysGlobals.setup( ProgName, ProgVersion )
    BibleOrgSysGlobals.addStandardOptionsAndProcess( parser, exportAvailable=True )

    demo()

    BibleOrgSysGlobals.closedown( ProgName, ProgVersion )
# end of GreekLexicon.py
Ejemplo n.º 36
0
def OpenSongXMLBibleFileCheck(givenFolderName,
                              strictCheck=True,
                              autoLoad=False,
                              autoLoadBooks=False):
    """
    Given a folder, search for OpenSong XML Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number found.

    if autoLoad is true and exactly one OpenSong Bible is found,
        returns the loaded OpenSongXMLBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("OpenSongXMLBibleFileCheck( {}, {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, str)
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (
            True,
            False,
    )
    if BibleOrgSysGlobals.debugFlag: assert autoLoadBooks in (
            True,
            False,
    )

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("OpenSongXMLBibleFileCheck: Given {!r} folder is unreadable").
            format(givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("OpenSongXMLBibleFileCheck: Given {!r} path is not a folder").
            format(givenFolderName))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(
            " OpenSongXMLBibleFileCheck: Looking for files in given {}".format(
                givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something == '__MACOSX':
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore: continue
            if not somethingUpperExt[
                    1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)
    #print( 'osx1', foundFiles )

    # See if there's an OpenSong project here in this folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
            firstLines = BibleOrgSysGlobals.peekIntoFile(thisFilename,
                                                         givenFolderName,
                                                         numLines=2)
            #print( 'osx1b', firstLines )
            if not firstLines or len(firstLines) < 2: continue
            if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
            and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print("OSB (unexpected) first line was {!r} in {}".format(
                        firstLines, thisFilename))
                continue
            if not firstLines[1].startswith('<bible>'):
                continue
        lastFilenameFound = thisFilename
        numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("OpenSongXMLBibleFileCheck got", numFound, givenFolderName,
                  lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            osb = OpenSongXMLBible(givenFolderName, lastFilenameFound)
            if autoLoadBooks: osb.load()  # Load and process the file
            return osb
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        print("    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("    OpenSongXMLBibleFileCheck: Looking for files in {}".
                  format(tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath): foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(
                    somethingUpper)
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith(ending):
                        ignore = True
                        break
                if ignore: continue
                if not somethingUpperExt[
                        1:] in extensionsToIgnore:  # Compare without the first dot
                    foundSubfiles.append(something)
        #print( 'osx2', foundSubfiles )

        # See if there's an OS project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLines = BibleOrgSysGlobals.peekIntoFile(thisFilename,
                                                             tryFolderName,
                                                             numLines=2)
                if not firstLines or len(firstLines) < 2: continue
                if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
                and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                    if BibleOrgSysGlobals.verbosityLevel > 2:
                        print("OSB (unexpected) first line was {!r} in {}".
                              format(firstLines, thisFilename))
                    continue
                if not firstLines[1].startswith('<bible>'):
                    continue
            foundProjects.append((
                tryFolderName,
                thisFilename,
            ))
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("OpenSongXMLBibleFileCheck foundProjects", numFound,
                  foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            osb = OpenSongXMLBible(foundProjects[0][0],
                                   foundProjects[0][1])  # Folder and filename
            if autoLoadBooks: osb.load()  # Load and process the file
            return osb
        return numFound
Ejemplo n.º 37
0
def createOpenSongXML(BibleObject,
                      outputFolder=None,
                      controlDict=None,
                      validationSchema=None):
    """
    Using settings from the given control file,
        converts the USFM information to a UTF-8 OpenSong XML file.

    This format is roughly documented at http://de.wikipedia.org/wiki/OpenSong_XML
        but more fields can be discovered by looking at downloaded files.
    """
    if BibleOrgSysGlobals.verbosityLevel > 1:
        print("Running createOpenSongXML…")
    if BibleOrgSysGlobals.debugFlag: assert BibleObject.books

    ignoredMarkers, unhandledMarkers, unhandledBooks = set(), set(), []

    def writeOpenSongBook(writerObject, BBB, bkData):
        """Writes a book to the OpenSong XML writerObject."""
        #print( 'BIBLEBOOK', [('bnumber',BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)), ('bname',BibleOrgSysGlobals.BibleBooksCodes.getEnglishName_NR(BBB)), ('bsname',BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation(BBB))] )
        OSISAbbrev = BibleOrgSysGlobals.BibleBooksCodes.getOSISAbbreviation(
            BBB)
        if not OSISAbbrev:
            logging.warning(
                "toOpenSong: Can't write {} OpenSong book because no OSIS code available"
                .format(BBB))
            unhandledBooks.append(BBB)
            return
        writerObject.writeLineOpen('b', ('n', bkData.getAssumedBookNames()[0]))
        haveOpenChapter, startedFlag, gotVP, accumulator = False, False, None, ""
        C, V = '-1', '-1'  # So first/id line starts at -1:0
        for processedBibleEntry in bkData._processedLines:  # Process internal Bible data lines
            marker, text, extras = processedBibleEntry.getMarker(
            ), processedBibleEntry.getCleanText(
            ), processedBibleEntry.getExtras()
            #print( marker, repr(text) )
            #if text: assert text[0] != ' '
            if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS:
                continue  # Just ignore added markers -- not needed here
            if marker in USFM_PRECHAPTER_MARKERS:
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag:
                    assert C == '-1' or marker == 'rem' or marker.startswith(
                        'mte')
                V = str(int(V) + 1)

            if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in (
                    'ie', ):  # Just ignore these lines
                ignoredMarkers.add(marker)
            elif marker == 'c':
                if accumulator:
                    writerObject.writeLineOpenClose('v', accumulator,
                                                    ('n', verseNumberString))
                    accumulator = ''
                if haveOpenChapter:
                    writerObject.writeLineClose('c')
                C, V = text, '0'
                writerObject.writeLineOpen('c', ('n', text))
                haveOpenChapter = True
            elif marker in (
                    'c#',
            ):  # These are the markers that we can safely ignore for this export
                ignoredMarkers.add(marker)
            elif marker == 'vp#':  # This precedes a v field and has the verse number to be printed
                gotVP = text  # Just remember it for now
            elif marker == 'v':
                V = text
                if gotVP:  # this is the verse number to be published
                    text = gotVP
                    gotVP = None
                startedFlag = True
                if accumulator:
                    writerObject.writeLineOpenClose('v', accumulator,
                                                    ('n', verseNumberString))
                    accumulator = ''
                #print( "Text {!r}".format( text ) )
                if not text:
                    logging.warning("createOpenSongXML: Missing text for v")
                    continue
                verseNumberString = text.replace('<', '').replace(
                    '>', ''
                ).replace(
                    '"', ''
                )  # Used below but remove anything that'll cause a big XML problem later

            elif marker in ('mt1','mt2','mt3','mt4', 'mte1','mte2','mte3','mte4', 'ms1','ms2','ms3','ms4', ) \
            or marker in USFM_ALL_INTRODUCTION_MARKERS \
            or marker in ('s1','s2','s3','s4', 'r','sr','mr', 'd','sp','cd', 'cl','lit', ):
                ignoredMarkers.add(marker)
            elif marker in USFM_BIBLE_PARAGRAPH_MARKERS:
                if BibleOrgSysGlobals.debugFlag: assert not text and not extras
                ignoredMarkers.add(marker)
            elif marker in (
                    'b',
                    'nb',
                    'ib',
            ):
                if BibleOrgSysGlobals.debugFlag: assert not text and not extras
                ignoredMarkers.add(marker)
            elif marker in (
                    'v~',
                    'p~',
            ):
                if BibleOrgSysGlobals.debugFlag: assert text or extras
                if not text:  # this is an empty (untranslated) verse
                    text = '- - -'  # but we'll put in a filler
                if startedFlag:
                    accumulator += (' ' if accumulator else
                                    '') + BibleOrgSysGlobals.makeSafeXML(text)
            else:
                if text:
                    logging.warning(
                        "toOpenSong: lost text in {} field in {} {}:{} {!r}".
                        format(marker, BBB, C, V, text))
                    #if BibleOrgSysGlobals.debugFlag: halt
                if extras:
                    logging.warning(
                        "toOpenSong: lost extras in {} field in {} {}:{}".
                        format(marker, BBB, C, V))
                    #if BibleOrgSysGlobals.debugFlag: halt
                unhandledMarkers.add(marker)
            if extras and marker not in (
                    'v~',
                    'p~',
            ) and marker not in ignoredMarkers:
                logging.critical(
                    "toOpenSong: extras not handled for {} at {} {}:{}".format(
                        marker, BBB, C, V))
        if accumulator:
            writerObject.writeLineOpenClose('v', accumulator,
                                            ('n', verseNumberString))
        if haveOpenChapter:
            writerObject.writeLineClose('c')
        writerObject.writeLineClose('b')

    # end of createOpenSongXML.writeOpenSongBook

    # Set-up our Bible reference system
    if 'PublicationCode' not in controlDict or controlDict[
            'PublicationCode'] == 'GENERIC':
        BOS = BibleObject.genericBOS
        BRL = BibleObject.genericBRL
    else:
        BOS = BibleOrganisationalSystem(controlDict['PublicationCode'])
        BRL = BibleReferenceList(BOS, BibleObject=None)

    if BibleOrgSysGlobals.verbosityLevel > 2:
        print(_("  Exporting to OpenSong format…"))
    try:
        osOFn = controlDict['OpenSongOutputFilename']
    except KeyError:
        osOFn = 'Bible.osong'
    filename = BibleOrgSysGlobals.makeSafeFilename(osOFn)
    xw = MLWriter(filename, outputFolder)
    xw.setHumanReadable()
    xw.start()
    xw.writeLineOpen('Bible')
    for BBB, bookData in BibleObject.books.items():
        writeOpenSongBook(xw, BBB, bookData)
    xw.writeLineClose('Bible')
    xw.close()

    if ignoredMarkers:
        logging.info("createOpenSongXML: Ignored markers were {}".format(
            ignoredMarkers))
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " + _("WARNING: Ignored createOpenSongXML markers were {}"
                           ).format(ignoredMarkers))
    if unhandledMarkers:
        logging.warning("createOpenSongXML: Unhandled markers were {}".format(
            unhandledMarkers))
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  " +
                  _("WARNING: Unhandled toOpenSong markers were {}").format(
                      unhandledMarkers))
    if unhandledBooks:
        logging.warning("createOpenSongXML: Unhandled books were {}".format(
            unhandledBooks))
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  " + _("WARNING: Unhandled createOpenSongXML books were {}"
                           ).format(unhandledBooks))

    # Now create a zipped version
    filepath = os.path.join(outputFolder, filename)
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("  Zipping {} OpenSong file…".format(filename))
    zf = zipfile.ZipFile(filepath + '.zip',
                         'w',
                         compression=zipfile.ZIP_DEFLATED)
    zf.write(filepath, filename)
    zf.close()

    if validationSchema: return xw.validate(validationSchema)
    if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
        print("  createOpenSongXML finished successfully.")
    return True
Ejemplo n.º 38
0
def CSVBibleFileCheck(givenFolderName,
                      strictCheck=True,
                      autoLoad=False,
                      autoLoadBooks=False):
    """
    Given a folder, search for CSV Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one CSV Bible is found,
        returns the loaded CSVBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("CSVBibleFileCheck( {}, {}, {} )".format(givenFolderName,
                                                       strictCheck, autoLoad))
    if BibleOrgSysGlobals.debugFlag:
        assert (givenFolderName and isinstance(givenFolderName, str))
    if BibleOrgSysGlobals.debugFlag:
        assert (autoLoad in (
            True,
            False,
        ) and autoLoadBooks in (
            True,
            False,
        ))

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("CSVBibleFileCheck: Given {} folder is unreadable").format(
                repr(givenFolderName)))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("CSVBibleFileCheck: Given {} path is not a folder").format(
                repr(givenFolderName)))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(" CSVBibleFileCheck: Looking for files in given {}".format(
            repr(givenFolderName)))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath): foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore: continue
            if not somethingUpperExt[
                    1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)
    if '__MACOSX' in foundFolders:
        foundFolders.remove('__MACOSX')  # don't visit these directories

    # See if there's an CSV Bible here in this given folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename in ('book_names.txt', 'Readme.txt'):
            looksHopeful = True
        elif thisFilename.endswith('.txt'):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(
                    thisFilename, givenFolderName)
                if firstLine is None:
                    continue  # seems we couldn't decode the file
                if not firstLine.startswith( '"Book","Chapter","Verse",' ) and not firstLine.startswith( '"1","1","1",') \
                and not firstLine.startswith( 'Book,Chapter,Verse,' ) and not firstLine.startswith( '1,1,1,'):
                    if BibleOrgSysGlobals.verbosityLevel > 2:
                        print(
                            "CSVBibleFileCheck: (unexpected) first line was {!r} in {}"
                            .format(firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("CSVBibleFileCheck got", numFound, givenFolderName,
                  lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = CSVBible(givenFolderName, lastFilenameFound[:-4]
                          )  # Remove the end of the actual filename ".txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        print("    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("CSVBibleFileCheck: {!r} subfolder is unreadable").format(
                    tryFolderName))
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("    CSVBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath): foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(
                    somethingUpper)
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith(ending):
                        ignore = True
                        break
                if ignore: continue
                if not somethingUpperExt[
                        1:] in extensionsToIgnore:  # Compare without the first dot
                    foundSubfiles.append(something)

        # See if there's an CSV Bible here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith('.txt'):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(
                        thisFilename, tryFolderName)
                    if firstLine is None:
                        continue  # seems we couldn't decode the file
                    if not firstLine.startswith("Ge 1:1 "):
                        if BibleOrgSysGlobals.verbosityLevel > 2:
                            print(
                                "CSVBibleFileCheck: (unexpected) first line was {!r} in {}"
                                .format(firstLine, thisFilename))
                        if debuggingThisModule: halt
                        continue
                foundProjects.append((
                    tryFolderName,
                    thisFilename,
                ))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("CSVBibleFileCheck foundProjects", numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert (len(foundProjects) == 1)
            uB = CSVBible(
                foundProjects[0][0], foundProjects[0][1]
                [:-4])  # Remove the end of the actual filename ".txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 39
0
def HaggaiXMLBibleFileCheck( givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False ):
    """
    Given a folder, search for Haggai XML Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number found.

    if autoLoad is true and exactly one Haggai Bible is found,
        returns the loaded HaggaiXMLBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2: print( "HaggaiXMLBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks ) )
    if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance( givenFolderName, str )
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (True,False,)

    # Check that the given folder is readable
    if not os.access( givenFolderName, os.R_OK ):
        logging.critical( _("HaggaiXMLBibleFileCheck: Given {!r} folder is unreadable").format( givenFolderName ) )
        return False
    if not os.path.isdir( givenFolderName ):
        logging.critical( _("HaggaiXMLBibleFileCheck: Given {!r} path is not a folder").format( givenFolderName ) )
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3: print( " HaggaiXMLBibleFileCheck: Looking for files in given {}".format( givenFolderName ) )
    foundFolders, foundFiles = [], []
    for something in os.listdir( givenFolderName ):
        somepath = os.path.join( givenFolderName, something )
        if os.path.isdir( somepath ):
            if something == '__MACOSX': continue # don't visit these directories
            foundFolders.append( something )
        elif os.path.isfile( somepath ):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith( ending): ignore=True; break
            if ignore: continue
            if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                foundFiles.append( something )
    #print( 'ff', foundFiles )

    # See if there's an Haggai project here in this folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted( foundFiles ):
        if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
            firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName, numLines=2 )
            if not firstLines or len(firstLines)<2: continue
            if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
            and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "HB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) )
                continue
            if 'haggai_' not in firstLines[1]: continue
        lastFilenameFound = thisFilename
        numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "HaggaiXMLBibleFileCheck got", numFound, givenFolderName, lastFilenameFound )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            ub = HaggaiXMLBible( givenFolderName, lastFilenameFound )
            if autoLoadBooks: ub.load() # Load and process the file
            return ub
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted( foundFolders ):
        tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' )
        if BibleOrgSysGlobals.verbosityLevel > 3: print( "    HaggaiXMLBibleFileCheck: Looking for files in {}".format( tryFolderName ) )
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir( tryFolderName ):
            somepath = os.path.join( givenFolderName, thisFolderName, something )
            if os.path.isdir( somepath ): foundSubfolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundSubfiles.append( something )
        #print( 'fsf', foundSubfiles )

        # See if there's an OS project here in this folder
        for thisFilename in sorted( foundSubfiles ):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName, numLines=2 )
                if not firstLines or len(firstLines)<2: continue
                if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
                and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                    if BibleOrgSysGlobals.verbosityLevel > 2: print( "HB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) )
                    continue
                if 'haggai_' not in firstLines[1]: continue
            foundProjects.append( (tryFolderName, thisFilename,) )
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "HaggaiXMLBibleFileCheck foundProjects", numFound, foundProjects )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            ub = HaggaiXMLBible( foundProjects[0][0], foundProjects[0][1] ) # Folder and filename
            if autoLoadBooks: ub.load() # Load and process the file
            return ub
        return numFound
Ejemplo n.º 40
0
        assert mark not in otherMarks
        assert mark not in cantillationMarks
    for j, mark in enumerate(otherMarks):
        #print( j, mark )
        assert otherMarks.count(mark) == 1
        assert mark not in consonants
        assert mark not in vowelPoints
        assert mark not in cantillationMarks
    for j, mark in enumerate(cantillationMarks):
        #print( j, mark )
        assert cantillationMarks.count(mark) == 1
        assert mark not in consonants
        assert mark not in vowelPoints
        assert mark not in otherMarks

    BibleOrgSysGlobals.printUnicodeInfo(vowelPoints, "Vowel points")
    BibleOrgSysGlobals.printUnicodeInfo(cantillationMarks,
                                        "Cantillation marks")


class Hebrew():
    """
    Class for handling a Hebrew string.
    """
    def __init__(self, text):
        """ Create an new Hebrew object. """
        self.originalText = text
        self.currentText = text

    # end of Hebrew.__init__
Ejemplo n.º 41
0
        def loadParagraph( paragraphXML, paragraphlocation ):
            """
            Load a paragraph from the USX XML.
            In this context, paragraph means heading and intro lines,
                as well as paragraphs of verses.

            Uses (and updates) C,V information from the containing function.
            """
            nonlocal C, V

            # Process the attributes first
            paragraphStyle = None
            for attrib,value in paragraphXML.items():
                if attrib=='style':
                    paragraphStyle = value # This is basically the USFM marker name
                else:
                    logging.warning( _("CH46 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )

            # Now process the paragraph text (or write a paragraph marker anyway)
            paragraphText = paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else ''
            if version is None: paragraphText = paragraphText.rstrip() # Don't need to strip extra spaces in v2
            self.addLine( paragraphStyle, paragraphText )

            # Now process the paragraph subelements
            for element in paragraphXML:
                location = element.tag + ' ' + paragraphlocation
                #print( "USXXMLBibleBook.load", C, V, element.tag, location )
                if element.tag == 'verse': # milestone (not a container)
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    verseStyle = altNumber = None
                    for attrib,value in element.items():
                        if attrib=='number':
                            V = value
                        elif attrib=='style':
                            verseStyle = value
                        elif attrib=='altnumber':
                            altNumber = value
                        else:
                            logging.error( _("KR60 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    if verseStyle != 'v':
                        logging.error( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) )
                    #if altNumber: print( repr(verseStyle), repr(altNumber) ); halt
                    altStuff = ' \\va {}\\va*'.format( altNumber ) if altNumber else ''
                    self.addLine( verseStyle, V + altStuff + ' ' )
                    # Now process the tail (if there's one) which is the verse text
                    if element.tail:
                        vText = element.tail
                        if vText[0]=='\n': vText = vText.lstrip() # Paratext puts cross references on a new line
                        if vText:
                            #print( repr(vText) )
                            self.appendToLastLine( vText )
                elif element.tag == 'char':
                    # Process the attributes first
                    charStyle = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            charStyle = value # This is basically the USFM character marker name
                            #print( "  charStyle", charStyle )
                            assert not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle )
                        else:
                            logging.error( _("QU52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    charLine = "\\{} {} ".format( charStyle, element.text )
                    # Now process the subelements -- chars are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( '{} {}:{} {}'.format( self.BBB, C, V, element.tag ) )
                        if subelement.tag == 'char': # milestone (not a container)
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            subCharStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style': subCharStyle = value
                                elif attrib=='closed':
                                    assert value=='false'
                                    charClosed = False
                                else:
                                    logging.error( _("KS41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            charLine += "\\{} {}".format( subCharStyle, subelement.text )
                            if charClosed: charLine += "\\{}*".format( subCharStyle )
                            #if subelement.tail is not None: print( "  tail1", repr(subelement.tail) )
                            charLine += '' if subelement.tail is None else subelement.tail
                        else:
                            logging.error( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) )
                            self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) )
                    # A character field must be added to the previous field
                    #if element.tail is not None: print( " tail2", repr(element.tail) )
                    charTail = ''
                    if element.tail:
                        charTail = element.tail
                        if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts footnote parts on new lines
                    charLine += "\\{}*{}".format( charStyle, charTail )
                    #if debuggingThisModule: print( "USX.loadParagraph:", C, V, paragraphStyle, charStyle, repr(charLine) )
                    self.appendToLastLine( charLine )
                elif element.tag == 'note':
                    #print( "NOTE", BibleOrgSysGlobals.elementStr( element ) )
                    # Process the attributes first
                    noteStyle = noteCaller = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            noteStyle = value # This is basically the USFM marker name
                            assert noteStyle in ('x','f',)
                        elif attrib=='caller': noteCaller = value # Usually hyphen or a symbol to be used for the note
                        else:
                            logging.error( _("CY38 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    if noteCaller=='' and self.BBB=='NUM' and C=='10' and V=='36': noteCaller = '+' # Hack
                    assert noteStyle and noteCaller # both compulsory
                    noteLine = "\\{} {} ".format( noteStyle, noteCaller )
                    if element.text:
                        noteText = element.text.strip()
                        noteLine += noteText
                    # Now process the subelements -- notes are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( C, V, subelement.tag )
                        if subelement.tag == 'char': # milestone (not a container)
                            # Process the attributes first
                            charStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style':
                                    charStyle = value
                                elif attrib=='closed':
                                    assert value=='false'
                                    charClosed = False
                                else:
                                    logging.warning( _("GJ67 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            noteLine += "\\{} {}".format( charStyle, subelement.text )
                            # Now process the subelements -- notes are one of the few multiply embedded fields in USX
                            for sub2element in subelement:
                                sub2location = sub2element.tag + ' ' + sublocation
                                #print( C, V, sub2element.tag )
                                if sub2element.tag == 'char': # milestone (not a container)
                                    BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location )
                                    # Process the attributes first
                                    char2Style, char2Closed = None, True
                                    for attrib,value in sub2element.items():
                                        if attrib=='style':
                                            char2Style = value
                                        elif attrib=='closed':
                                            assert value=='false'
                                            char2Closed = False
                                        else:
                                            logging.warning( _("VH36 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                                    assert char2Closed
                                    noteLine += "\\{} {}\\{}*{}".format( char2Style, sub2element.text, char2Style, sub2element.tail if sub2element.tail else '' )
                            if charClosed: noteLine += "\\{}*".format( charStyle )
                            if subelement.tail:
                                charTail = subelement.tail
                                if charTail[0]=='\n': charTail = charTail.lstrip() # Paratext puts cross reference parts on a new line
                                noteLine += charTail
                        elif subelement.tag == 'unmatched': # Used to denote errors in the source text
                            BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation )
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            unmmatchedMarker = None
                            for attrib,value in subelement.items():
                                if attrib=='marker':
                                    unmmatchedMarker = value
                                else:
                                    logging.warning( _("NV21 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            self.addPriorityError( 2, C, V, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) )
                        else:
                            logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, C, V, sublocation ) )
                            self.addPriorityError( 1, C, V, _("Unprocessed {} subelement").format( subelement.tag ) )
                        if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail
                    #noteLine += "\\{}*".format( charStyle )
                    noteLine += "\\{}*".format( noteStyle )
                    if element.tail:
                        #if '\n' in element.tail: halt
                        noteTail = element.tail
                        if noteTail[0]=='\n': noteTail = noteTail.lstrip() # Paratext puts multiple cross-references on new lines
                        noteLine += noteTail
                    #print( "NoteLine", repr(noteLine) )
                    self.appendToLastLine( noteLine )
                elif element.tag == 'link': # Used to include extra resources
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    linkStyle = linkDisplay = linkTarget = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            linkStyle = value
                            assert linkStyle in ('jmp',)
                        elif attrib=='display':
                            linkDisplay = value # e.g., "click here"
                        elif attrib=='target':
                            linkTarget = value # e.g., some reference
                        else:
                            logging.warning( _("KW54 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    self.addPriorityError( 3, C, V, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) )
                elif element.tag == 'unmatched': # Used to denote errors in the source text
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    self.addPriorityError( 2, C, V, _("Unmatched element in {}").format( location) )
                else:
                    logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, C, V, location ) )
                    self.addPriorityError( 1, C, V, _("Unprocessed {} element").format( element.tag ) )
                    for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] )
                    if BibleOrgSysGlobals.debugFlag: halt
def ForgeForSwordSearcherBibleFileCheck(givenFolderName,
                                        strictCheck=True,
                                        autoLoad=False,
                                        autoLoadBooks=False):
    """
    Given a folder, search for ForgeForSwordSearcher Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one ForgeForSwordSearcher Bible is found,
        returns the loaded ForgeForSwordSearcherBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("ForgeForSwordSearcherBibleFileCheck( {}, {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, str)
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (
            True,
            False,
    )

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("ForgeForSwordSearcherBibleFileCheck: Given {} folder is unreadable"
              ).format(repr(givenFolderName)))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("ForgeForSwordSearcherBibleFileCheck: Given {} path is not a folder"
              ).format(repr(givenFolderName)))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(
            " ForgeForSwordSearcherBibleFileCheck: Looking for files in given {}"
            .format(repr(givenFolderName)))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something == '__MACOSX':
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore: continue
            if not somethingUpperExt[
                    1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)

    # See if there's an ForgeForSwordSearcherBible project here in this given folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename in ('book_names.txt', 'Readme.txt'):
            looksHopeful = True
        elif thisFilename.endswith('.txt'):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(
                    thisFilename, givenFolderName)
                #print( '1', repr(firstLine) )
                if firstLine is None:
                    continue  # seems we couldn't decode the file
                if firstLine and firstLine[0] == chr(65279):  #U+FEFF or \ufeff
                    logging.info(
                        "ForgeForSwordSearcherBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}"
                        .format(thisFilename))
                    firstLine = firstLine[
                        1:]  # Remove the Unicode Byte Order Marker (BOM)
                match = re.search('^; TITLE:\\s', firstLine)
                if match:
                    if BibleOrgSysGlobals.debugFlag:
                        print(
                            "ForgeForSwordSearcherBibleFileCheck First line got {!r} match from {!r}"
                            .format(match.group(0), firstLine))
                else:
                    if BibleOrgSysGlobals.verbosityLevel > 3:
                        print(
                            "ForgeForSwordSearcherBibleFileCheck: (unexpected) first line was {!r} in {}"
                            .format(firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ForgeForSwordSearcherBibleFileCheck got", numFound,
                  givenFolderName, lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = ForgeForSwordSearcherBible(
                givenFolderName, lastFilenameFound[:-4]
            )  # Remove the end of the actual filename ".txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        print("    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("ForgeForSwordSearcherBibleFileCheck: {!r} subfolder is unreadable"
                  ).format(tryFolderName))
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(
                "    ForgeForSwordSearcherBibleFileCheck: Looking for files in {}"
                .format(tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath): foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(
                    somethingUpper)
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith(ending):
                        ignore = True
                        break
                if ignore: continue
                if not somethingUpperExt[
                        1:] in extensionsToIgnore:  # Compare without the first dot
                    foundSubfiles.append(something)

        # See if there's an ForgeForSwordSearcherBible here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith('.txt'):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(
                        thisFilename, tryFolderName)
                    #print( '2', repr(firstLine) )
                    if firstLine is None:
                        continue  # seems we couldn't decode the file
                    if firstLine and firstLine[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "ForgeForSwordSearcherBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(thisFilename))
                        firstLine = firstLine[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search('^; TITLE:\\s', firstLine)
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print(
                                "ForgeForSwordSearcherBibleFileCheck First line got type {!r} match from {!r}"
                                .format(match.group(0), firstLine))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "ForgeForSwordSearcherBibleFileCheck: (unexpected) first line was {!r} in {}"
                                .format(firstLine, thisFilename))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue
                foundProjects.append((
                    tryFolderName,
                    thisFilename,
                ))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("ForgeForSwordSearcherBibleFileCheck foundProjects",
                  numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            uB = ForgeForSwordSearcherBible(
                foundProjects[0][0], foundProjects[0][1]
                [:-4])  # Remove the end of the actual filename ".txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 43
0
    def validateEntry( self, entry ):
        """
        Check/validate the given Strongs Greek lexicon entry.
        """
        if BibleOrgSysGlobals.debugFlag: assert( entry.tag == "entry" )
        BibleOrgSysGlobals.checkXMLNoText( entry, entry.tag, "na19" )
        BibleOrgSysGlobals.checkXMLNoTail( entry, entry.tag, "kaq9" )

        # Process the entry attributes first
        strongs5 = None
        for attrib,value in entry.items():
            if attrib ==  "strongs":
                strongs5 = value
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Validating {} entry...".format( strongs5 ) )
            else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element".format( attrib, value ) )
        if BibleOrgSysGlobals.debugFlag: assert( len(strongs5)==5 and strongs5.isdigit() )

        entryResults = {}
        entryString = ""
        gettingEssentials = True
        for j, element in enumerate( entry ):
            #print( strongs5, j, element.tag, repr(entryString) )
            if element.tag == "strongs":
                if BibleOrgSysGlobals.debugFlag: assert( gettingEssentials and j==0 and element.text )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d" )
                if strongs5!='02717' and (3203 > int(strongs5) > 3302):
                    BibleOrgSysGlobals.checkXMLNoTail( element, element.tag, "f3g7" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g" )
                strongs = element.text
                if BibleOrgSysGlobals.debugFlag: assert( strongs5.endswith( strongs ) )
                if element.tail and element.tail.strip(): entryString += element.tail.strip()
            elif element.tag == "greek":
                location = "greek in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "jke0" )
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35" )
                # Process the attributes
                translit = greek = beta = None
                for attrib,value in element.items():
                    if attrib=="translit": translit = value
                    elif attrib=="unicode": greek = value
                    elif attrib=="BETA": beta = value
                    else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) )
                if BibleOrgSysGlobals.debugFlag: assert( greek and translit and beta )
                if 'word' not in entryResults: # This is the first/main entry
                    if BibleOrgSysGlobals.debugFlag: assert( gettingEssentials and j==1 )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                    entryResults['word'] = (greek, translit, beta)
                else:
                    #print( "Have multiple greek entries in " + strongs5 )
                    if BibleOrgSysGlobals.debugFlag: assert( j > 2 )
                    gettingEssentials = False
                    entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ) #.replace( '\n', '' )
            elif element.tag == "pronunciation":
                location = "pronunciation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" )
                # Process the attributes
                pronunciation = None
                for attrib,value in element.items():
                    if attrib=="strongs": pronunciation = value
                    else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) )
                if gettingEssentials:
                    #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" )
                    if BibleOrgSysGlobals.debugFlag:
                        assert( j == 2 )
                        assert( pronunciation )
                        assert( 'pronunciation' not in entryResults )
                    entryResults['pronunciation'] = pronunciation
                else:
                    if BibleOrgSysGlobals.debugFlag: assert( j>2 and not gettingEssentials )
                    if element.tail and element.tail.strip(): entryString += element.tail.strip().replace( '\n', '' )
            elif element.tag == "strongs_derivation":
                location = "strongs_derivation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                #print( strongs5, "derivation", repr(derivation) )
                if BibleOrgSysGlobals.debugFlag:
                    assert( derivation and '\t' not in derivation and '\n' not in derivation )
                entryString +=  derivation
            elif element.tag == "strongs_def":
                location = "strongs_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, "jd28" )
                definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                #print( strongs5, "definition", repr(definition) )
                if BibleOrgSysGlobals.debugFlag:
                    assert( definition and '\t' not in definition and '\n' not in definition )
                entryString += definition
            elif element.tag == "kjv_def":
                location = "kjv_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" )
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" )
                #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" )
                KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                #print( strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) )
                if BibleOrgSysGlobals.debugFlag: assert( KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition )
                entryString += KJVdefinition
            elif element.tag == "strongsref":
                location = "strongsref in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "kls2" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24" )
                strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                if BibleOrgSysGlobals.debugFlag:
                    assert( strongsRef and '\t' not in strongsRef and '\n' not in strongsRef )
                strongsRef = re.sub( '<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef )
                strongsRef = re.sub( '<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef )
                #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #print( strongs5, "strongsRef", repr(strongsRef) )
                entryString += ' ' + strongsRef
            elif element.tag == "see":
                location = "see in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" )
                # Process the attributes
                seeLanguage = seeStrongsNumber = None
                for attrib,value in element.items():
                    if attrib == "language": seeLanguage = value
                    elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here
                    else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) )
                if BibleOrgSysGlobals.debugFlag:
                    assert( seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit() )
                    assert( seeLanguage in ('GREEK','HEBREW',) )
                if 'see' not in entryResults: entryResults['see'] = []
                entryResults['see'].append( ('G' if seeLanguage=='GREEK' else 'H') + seeStrongsNumber )
            else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text ) )

        if entryString:
            #print( strongs5, "entryString", repr(entryString) )
            if BibleOrgSysGlobals.debugFlag:
                assert( '\t' not in entryString and '\n' not in entryString )
            entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>',
                                r'<StrongsRef>G\1</StrongsRef>', entryString )
            entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>',
                                r'<StrongsRef>G\1</StrongsRef>', entryString )
            entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>',
                                r'<StrongsRef>H\1</StrongsRef>', entryString )
            entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>',
                                r'<StrongsRef>H\1</StrongsRef>', entryString )
            if BibleOrgSysGlobals.debugFlag:
                assert( 'strongsref' not in entryString )
            entryResults['Entry'] = entryString
        #print( "entryResults", entryResults )
        self.StrongsEntries[strongs] = entryResults
Ejemplo n.º 44
0
    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))
        self.XMLTree = ElementTree().parse(self.sourceFilepath)
        if BibleOrgSysGlobals.debugFlag:
            assert len(
                self.XMLTree)  # Fail here if we didn't load anything at all

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['VerseView'] = {}

        # Find the main (bible) container
        if self.XMLTree.tag == VerseViewXMLBible.treeTag:
            location = "VerseView XML file"
            BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h')
            BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location,
                                                    'js24')
            BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8')

            # Find the submain (various info and then book) containers
            bookNumber = 0
            for element in self.XMLTree:
                if element.tag == VerseViewXMLBible.filenameTag:
                    sublocation = "filename in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    #self.filename = element.text
                elif element.tag == VerseViewXMLBible.revisionTag:
                    sublocation = "revision in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Revision'] = element.text
                elif element.tag == VerseViewXMLBible.titleTag:
                    sublocation = "title in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Title'] = element.text
                elif element.tag == VerseViewXMLBible.fontTag:
                    sublocation = "font in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Font'] = element.text
                elif element.tag == VerseViewXMLBible.copyrightTag:
                    sublocation = "copyright in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Copyright'] = element.text
                elif element.tag == VerseViewXMLBible.sizefactorTag:
                    sublocation = "sizefactor in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    if BibleOrgSysGlobals.debugFlag: assert element.text == '1'
                elif element.tag == VerseViewXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'g3g5')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'd3f6')
                    bookNumber += 1
                    self.__validateAndExtractBook(element, bookNumber)
                else:
                    logging.error(
                        "xk15 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load {!r} but got {!r}".format(
                VerseViewXMLBible.treeTag, self.XMLTree.tag))

        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            # These are all compulsory so they should all exist
            #print( "Filename is {!r}".format( self.filename ) )
            print("Revision is {!r}".format(
                self.suppliedMetadata['VerseView']['Revision']))
            print("Title is {!r}".format(
                self.suppliedMetadata['VerseView']['Title']))
            print("Font is {!r}".format(
                self.suppliedMetadata['VerseView']['Font']))
            print("Copyright is {!r}".format(
                self.suppliedMetadata['VerseView']['Copyright']))
            #print( "SizeFactor is {!r}".format( self.sizeFactor ) )

        self.applySuppliedMetadata(
            'VerseView')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()
Ejemplo n.º 45
0
    def __validateAndExtractHeader( self ):
        """
        Extracts information out of the header record, such as:
            <INFORMATION>
            <title>King James Version</title>
            <creator></creator>
            <subject>The Holy Bible</subject>
            <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description>
            <publisher>FREE BIBLE SOFTWARE GROUP</publisher>
            <contributors />
            <date>2009-01-23</date>
            <type>Bible</type>
            <format>Haggai XML Bible Markup Language</format>
            <identifier>kjv</identifier>
            <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source>
            <language>ENG</language>
            <coverage>provide the Bible to the nations of the world</coverage>
            <rights>We believe that this Bible is found in the Public Domain.</rights>
        </INFORMATION>
        """
        if BibleOrgSysGlobals.debugFlag: assert self.header
        location = 'Header'
        BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' )
        BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' )
        BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' )

        # TODO: We probably need to rationalise some of the self.xxx stores
        for element in self.header:
            #print( "header", element.tag )
            if element.tag == 'title':
                sublocation = "title in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.title = element.text
            elif element.tag == 'creator':
                sublocation = "creator in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.creator = element.text
            elif element.tag == 'subject':
                sublocation = "subject in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.subject = element.text
            elif element.tag == 'description':
                sublocation = "description in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.description = element.text
            elif element.tag == 'publisher':
                sublocation = "publisher in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.publisher = element.text
            elif element.tag == 'contributor':
                sublocation = "contributor in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' )
                if element.text:
                    try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list
                    except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one
            elif element.tag == 'contributors':
                sublocation = "contributors in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.contributors = element.text
            elif element.tag == 'date':
                sublocation = "date in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.date = element.text
            elif element.tag == 'type':
                sublocation = "type in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.documentType = element.text
            elif element.tag == 'format':
                sublocation = "format in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                if BibleOrgSysGlobals.debugFlag: assert element.text == 'Haggai XML Bible Markup Language'
            elif element.tag == 'identifier':
                sublocation = "identifier in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.identifier = element.text
            elif element.tag == 'source':
                sublocation = "source in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.source = element.text
            elif element.tag == 'language':
                sublocation = "language in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.language = element.text
            elif element.tag == 'coverage':
                sublocation = "coverage in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.coverage = element.text
            elif element.tag == 'rights':
                sublocation = "rights in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.rights = element.text
            else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) )
Ejemplo n.º 46
0
def getChangeLogFilepath(loggingFolder, projectName):
    """
    """
    return os.path.join( loggingFolder, \
                        BibleOrgSysGlobals.makeSafeFilename( projectName.replace(' ','_') + '_ChangeLog.txt' ) )
Ejemplo n.º 47
0
def YETBibleFileCheck(givenFolderName,
                      strictCheck=True,
                      autoLoad=False,
                      autoLoadBooks=False):
    """
    Given a folder, search for YET Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one YET Bible is found,
        returns the loaded YETBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("YETBibleFileCheck( {}, {}, {} )".format(givenFolderName,
                                                       strictCheck, autoLoad))
    if BibleOrgSysGlobals.debugFlag:
        assert (givenFolderName and isinstance(givenFolderName, str))
    if BibleOrgSysGlobals.debugFlag: assert (autoLoad in (
            True,
            False,
    ))

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("YETBibleFileCheck: Given {!r} folder is unreadable").format(
                givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("YETBibleFileCheck: Given {!r} path is not a folder").format(
                givenFolderName))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(" YETBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath): foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            if somethingUpperExt in filenameEndingsToAccept:
                foundFiles.append(something)
    if '__MACOSX' in foundFolders:
        foundFolders.remove('__MACOSX')  # don't visit these directories

    # See if there's an YETBible project here in this given folder
    numFound = 0
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename.endswith('.yet'):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(
                    thisFilename, givenFolderName)
                if not firstLine.startswith("info\t"):
                    if BibleOrgSysGlobals.verbosityLevel > 2:
                        print(
                            "YETBible (unexpected) first line was {!r} in {}".
                            format(firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("YETBibleFileCheck got", numFound, givenFolderName,
                  lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = YETBible(givenFolderName, lastFilenameFound[:-4]
                          )  # Remove the end of the actual filename ".yet"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("YETBibleFileCheck: {!r} subfolder is unreadable").format(
                    tryFolderName))
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("    YETBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath): foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(
                    somethingUpper)
                if somethingUpperExt in filenameEndingsToAccept:
                    foundSubfiles.append(something)

        # See if there's an YETBible project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith('.yet'):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(
                        thisFilename, tryFolderName)
                    if not firstLine.startswith("info\t"):
                        if BibleOrgSysGlobals.verbosityLevel > 2:
                            print(
                                "YETBible (unexpected) first line was {!r} in {}"
                                .format(firstLine, thisFilname))
                            halt
                        continue
                foundProjects.append((
                    tryFolderName,
                    thisFilename,
                ))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("YETBibleFileCheck foundProjects", numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert (len(foundProjects) == 1)
            uB = YETBible(
                foundProjects[0][0], foundProjects[0][1]
                [:-9])  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 48
0
    def _validate(self):
        """
        Check/validate the loaded data.
        """
        assert (self._XMLtree)

        uniqueDict = {}
        #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        for j, element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoSubelements(element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            "Compulsory {!r} attribute is missing from {} element in record {}"
                            .format(attributeName, element.tag, j))
                    if not attributeValue and attributeName != "type":
                        logging.warning(
                            "Compulsory {!r} attribute is blank on {} element in record {}"
                            .format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                "Optional {!r} attribute is blank on {} element in record {}"
                                .format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            "Additional {!r} attribute ({!r}) found on {} element in record {}"
                            .format(attributeName, attributeValue, element.tag,
                                    j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None and attributeName != "reference_name":
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                "Found {!r} data repeated in {!r} field on {} element in record {}"
                                .format(attributeValue, attributeName,
                                        element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)
            else:
                logging.warning("Unexpected element: {} in record {}".format(
                    element.tag, j))
Ejemplo n.º 49
0
def UnboundBibleFileCheck(givenFolderName, strictCheck=True, autoLoad=False, autoLoadBooks=False):
    """
    Given a folder, search for Unbound Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one Unbound Bible is found,
        returns the loaded UnboundBible object.
    """
    if BibleOrgSysGlobals.verbosityLevel > 2:
        print("UnboundBibleFileCheck( {}, {}, {}, {} )".format(givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, str)
    if BibleOrgSysGlobals.debugFlag:
        assert autoLoad in (True, False)

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(_("UnboundBibleFileCheck: Given {!r} folder is unreadable").format(givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(_("UnboundBibleFileCheck: Given {!r} path is not a folder").format(givenFolderName))
        return False

    # Find all the files and folders in this folder
    if BibleOrgSysGlobals.verbosityLevel > 3:
        print(" UnboundBibleFileCheck: Looking for files in given {}".format(givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something == "__MACOSX":
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore:
                continue
            if not somethingUpperExt[1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)

    # See if there's an UnboundBible project here in this given folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename in ("book_names.txt", "Readme.txt"):
            looksHopeful = True
        elif thisFilename.endswith("_utf8.txt"):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(thisFilename, givenFolderName)
                if firstLine is None:
                    continue  # seems we couldn't decode the file
                if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)":
                    if BibleOrgSysGlobals.verbosityLevel > 2:
                        print("UB (unexpected) first line was {!r} in {}".format(firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("UnboundBibleFileCheck got", numFound, givenFolderName, lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = UnboundBible(
                givenFolderName, lastFilenameFound[:-9]
            )  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks:
                uB.load()  # Load and process the file
            return uB
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        print("    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + "/")
        if not os.access(tryFolderName, os.R_OK):  # The subfolder is not readable
            logging.warning(_("UnboundBibleFileCheck: {!r} subfolder is unreadable").format(tryFolderName))
            continue
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print("    UnboundBibleFileCheck: Looking for files in {}".format(tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        for something in os.listdir(tryFolderName):
            somepath = os.path.join(givenFolderName, thisFolderName, something)
            if os.path.isdir(somepath):
                foundSubfolders.append(something)
            elif os.path.isfile(somepath):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext(somethingUpper)
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith(ending):
                        ignore = True
                        break
                if ignore:
                    continue
                if not somethingUpperExt[1:] in extensionsToIgnore:  # Compare without the first dot
                    foundSubfiles.append(something)

        # See if there's an UB project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith("_utf8.txt"):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(thisFilename, tryFolderName)
                    if firstLine is None:
                        continue  # seems we couldn't decode the file
                    if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)":
                        if BibleOrgSysGlobals.verbosityLevel > 2:
                            print("UB (unexpected) first line was {!r} in {}".format(firstLine, thisFilename))
                            halt
                        continue
                foundProjects.append((tryFolderName, thisFilename))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("UnboundBibleFileCheck foundProjects", numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag:
                assert len(foundProjects) == 1
            uB = UnboundBible(
                foundProjects[0][0], foundProjects[0][1][:-9]
            )  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks:
                uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 50
0
def demo():
    """
    Demonstrate reading and checking some Bible databases.
    """
    if BibleOrgSysGlobals.verbosityLevel > 0: print(ProgNameVersion)

    #testFolder = "Tests/DataFilesForTests/BCVTest1/"
    testFolder = "OutputFiles/BOS_BCV_Export/"

    if 1:  # demo the file checking code -- first with the whole folder and then with only one folder
        if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV TestA1")
        result1 = BCVBibleFileCheck(testFolder)
        if BibleOrgSysGlobals.verbosityLevel > 1: print("BCV TestA1", result1)

        if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV TestA2")
        result2 = BCVBibleFileCheck(testFolder,
                                    autoLoad=True)  # But doesn't preload books
        if BibleOrgSysGlobals.verbosityLevel > 1: print("BCV TestA2", result2)
        #result2.loadMetadataFile( os.path.join( testFolder, "BooknamesMetadata.txt" ) )
        if BibleOrgSysGlobals.strictCheckingFlag:
            result2.check()
            #print( UsfmB.books['GEN']._processedLines[0:40] )
            bibleErrors = result2.getErrors()
            # print( bibleErrors )
        #if BibleOrgSysGlobals.commandLineArguments.export:
        ###result2.toDrupalBible()
        #result2.doAllExports( wantPhotoBible=False, wantODFs=False, wantPDFs=False )

        if BibleOrgSysGlobals.verbosityLevel > 0: print("\nBCV TestA3")
        result3 = BCVBibleFileCheck(testFolder,
                                    autoLoad=True,
                                    autoLoadBooks=True)
        if BibleOrgSysGlobals.verbosityLevel > 1: print("BCV TestA3", result3)
        #result3.loadMetadataFile( os.path.join( testFolder, "BooknamesMetadata.txt" ) )
        if BibleOrgSysGlobals.strictCheckingFlag:
            result3.check()
            #print( UsfmB.books['GEN']._processedLines[0:40] )
            bibleErrors = result3.getErrors()
            # print( bibleErrors )
        if BibleOrgSysGlobals.commandLineArguments.export:
            ##result3.toDrupalBible()
            result3.doAllExports(wantPhotoBible=False,
                                 wantODFs=False,
                                 wantPDFs=False)

    if 0:  # all discovered modules in the test folder
        foundFolders, foundFiles = [], []
        for something in os.listdir(testFolder):
            somepath = os.path.join(testFolder, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)

        if BibleOrgSysGlobals.maxProcesses > 1:  # Get our subprocesses ready and waiting for work
            if BibleOrgSysGlobals.verbosityLevel > 1:
                print("\nTrying all {} discovered modules…".format(
                    len(foundFolders)))
            parameters = [folderName for folderName in sorted(foundFolders)]
            BibleOrgSysGlobals.alreadyMultiprocessing = True
            with multiprocessing.Pool(processes=BibleOrgSysGlobals.maxProcesses
                                      ) as pool:  # start worker processes
                results = pool.map(testBCV,
                                   parameters)  # have the pool do our loads
                assert len(results) == len(
                    parameters
                )  # Results (all None) are actually irrelevant to us here
            BibleOrgSysGlobals.alreadyMultiprocessing = False
        else:  # Just single threaded
            for j, someFolder in enumerate(sorted(foundFolders)):
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print("\nBCV D{}/ Trying {}".format(j + 1, someFolder))
                #myTestFolder = os.path.join( testFolder, someFolder+'/' )
                testBCV(someFolder)

    if 0:  # Load and process some of our test versions
        count = 0
        for name, encoding, testFolder in (
            ("Matigsalug", 'utf-8', "Tests/DataFilesForTests/BCVTest1/"),
            ("Matigsalug", 'utf-8', "Tests/DataFilesForTests/BCVTest2/"),
            ("Exported", 'utf-8', "Tests/BOS_BCV_Export/"),
        ):
            count += 1
            if os.access(testFolder, os.R_OK):
                if BibleOrgSysGlobals.verbosityLevel > 0:
                    print("\nBCV A{}/".format(count))
                bcvB = BCVBible(testFolder, name, encoding=encoding)
                bcvB.load()
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print("Gen assumed book name:",
                          repr(bcvB.getAssumedBookName('GEN')))
                    print("Gen long TOC book name:",
                          repr(bcvB.getLongTOCName('GEN')))
                    print("Gen short TOC book name:",
                          repr(bcvB.getShortTOCName('GEN')))
                    print("Gen book abbreviation:",
                          repr(bcvB.getBooknameAbbreviation('GEN')))
                if BibleOrgSysGlobals.verbosityLevel > 0: print(bcvB)
                if BibleOrgSysGlobals.strictCheckingFlag:
                    bcvB.check()
                    #print( UsfmB.books['GEN']._processedLines[0:40] )
                    bcbibleErrors = bcvB.getErrors()
                    # print( bcbibleErrors )
                if BibleOrgSysGlobals.commandLineArguments.export:
                    ##bcvB.toDrupalBible()
                    bcvB.doAllExports(wantPhotoBible=False,
                                      wantODFs=False,
                                      wantPDFs=False)
                    newObj = BibleOrgSysGlobals.unpickleObject(
                        BibleOrgSysGlobals.makeSafeFilename(name) + '.pickle',
                        os.path.join("OutputFiles/",
                                     "BOS_Bible_Object_Pickle/"))
                    if BibleOrgSysGlobals.verbosityLevel > 0:
                        print("newObj is", newObj)
            else:
                print(
                    "\nSorry, test folder {!r} is not readable on this computer."
                    .format(testFolder))
Ejemplo n.º 51
0
    def __load(self, XMLFilepath):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert (XMLFilepath)
        self.__XMLFilepath = XMLFilepath
        assert (self._XMLtree is None or len(self._XMLtree) == 0
                )  # Make sure we're not doing this twice

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(
                _("Loading BibleReferencesLinks XML file from {!r}...").format(
                    self.__XMLFilepath))
        self._XMLtree = ElementTree().parse(self.__XMLFilepath)
        assert (self._XMLtree)  # Fail here if we didn't load anything at all

        if self._XMLtree.tag == self._treeTag:
            header = self._XMLtree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLtree.remove(header)
                BibleOrgSysGlobals.checkXMLNoText(header, "header")
                BibleOrgSysGlobals.checkXMLNoTail(header, "header")
                BibleOrgSysGlobals.checkXMLNoAttributes(header, "header")
                if len(header) > 1:
                    logging.info(_("Unexpected elements in header"))
                elif len(header) == 0:
                    logging.info(_("Missing work element in header"))
                else:
                    work = header[0]
                    BibleOrgSysGlobals.checkXMLNoText(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoTail(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        work, "work in header")
                    if work.tag == "work":
                        self.ProgVersion = work.find("version").text
                        self.dateString = work.find("date").text
                        self.titleString = work.find("title").text
                    else:
                        logging.warning(_("Missing work element in header"))
            else:
                logging.warning(
                    _("Missing header element (looking for {!r} tag)".format(
                        self._headerTag)))
            if header.tail is not None and header.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after header").format(
                        element.tail))
        else:
            logging.error(
                _("Expected to load {!r} but got {!r}").format(
                    self._treeTag, self._XMLtree.tag))
Ejemplo n.º 52
0
    def validateEntry( self, entry ):
        """
        Check/validate the given Strongs Greek lexicon entry.
        """
        if BibleOrgSysGlobals.debugFlag: assert entry.tag == "entry"
        BibleOrgSysGlobals.checkXMLNoText( entry, entry.tag, "na19" )
        BibleOrgSysGlobals.checkXMLNoTail( entry, entry.tag, "kaq9" )

        # Process the entry attributes first
        strongs5 = None
        for attrib,value in entry.items():
            if attrib ==  "strongs":
                strongs5 = value
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Validating {} entry…".format( strongs5 ) )
            else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element".format( attrib, value ) )
        if BibleOrgSysGlobals.debugFlag: assert len(strongs5)==5 and strongs5.isdigit()

        entryResults = {}
        entryString = ""
        gettingEssentials = True
        for j, element in enumerate( entry ):
            #print( strongs5, j, element.tag, repr(entryString) )
            if element.tag == "strongs":
                if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j==0 and element.text
                BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d" )
                if strongs5!='02717' and (3203 > int(strongs5) > 3302):
                    BibleOrgSysGlobals.checkXMLNoTail( element, element.tag, "f3g7" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g" )
                strongs = element.text
                if BibleOrgSysGlobals.debugFlag: assert strongs5.endswith( strongs )
                if element.tail and element.tail.strip(): entryString += element.tail.strip()
            elif element.tag == "greek":
                location = "greek in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "jke0" )
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35" )
                # Process the attributes
                translit = greek = beta = None
                for attrib,value in element.items():
                    if attrib=="translit": translit = value
                    elif attrib=="unicode": greek = value
                    elif attrib=="BETA": beta = value
                    else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) )
                if BibleOrgSysGlobals.debugFlag: assert greek and translit and beta
                if 'word' not in entryResults: # This is the first/main entry
                    if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j==1
                    BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                    entryResults['word'] = (greek, translit, beta)
                else:
                    #print( "Have multiple greek entries in " + strongs5 )
                    if BibleOrgSysGlobals.debugFlag: assert j > 2
                    gettingEssentials = False
                    entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ) #.replace( '\n', '' )
            elif element.tag == "pronunciation":
                location = "pronunciation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" )
                # Process the attributes
                pronunciation = None
                for attrib,value in element.items():
                    if attrib=="strongs": pronunciation = value
                    else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) )
                if gettingEssentials:
                    #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" )
                    if BibleOrgSysGlobals.debugFlag:
                        assert j == 2
                        assert pronunciation
                        assert 'pronunciation' not in entryResults
                    entryResults['pronunciation'] = pronunciation
                else:
                    if BibleOrgSysGlobals.debugFlag: assert j>2 and not gettingEssentials
                    if element.tail and element.tail.strip(): entryString += element.tail.strip().replace( '\n', '' )
            elif element.tag == "strongs_derivation":
                location = "strongs_derivation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                #print( strongs5, "derivation", repr(derivation) )
                if BibleOrgSysGlobals.debugFlag:
                    assert derivation and '\t' not in derivation and '\n' not in derivation
                entryString +=  derivation
            elif element.tag == "strongs_def":
                location = "strongs_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, "jd28" )
                definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                #print( strongs5, "definition", repr(definition) )
                if BibleOrgSysGlobals.debugFlag:
                    assert definition and '\t' not in definition and '\n' not in definition
                entryString += definition
            elif element.tag == "kjv_def":
                location = "kjv_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" )
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" )
                #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" )
                KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                #print( strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) )
                if BibleOrgSysGlobals.debugFlag: assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition
                entryString += KJVdefinition
            elif element.tag == "strongsref":
                location = "strongsref in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "kls2" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24" )
                strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' )
                if BibleOrgSysGlobals.debugFlag:
                    assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef
                strongsRef = re.sub( '<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef )
                strongsRef = re.sub( '<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef )
                #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #print( strongs5, "strongsRef", repr(strongsRef) )
                entryString += ' ' + strongsRef
            elif element.tag == "see":
                location = "see in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" )
                # Process the attributes
                seeLanguage = seeStrongsNumber = None
                for attrib,value in element.items():
                    if attrib == "language": seeLanguage = value
                    elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here
                    else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) )
                if BibleOrgSysGlobals.debugFlag:
                    assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit()
                    assert seeLanguage in ('GREEK','HEBREW',)
                if 'see' not in entryResults: entryResults['see'] = []
                entryResults['see'].append( ('G' if seeLanguage=='GREEK' else 'H') + seeStrongsNumber )
            else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text ) )

        if entryString:
            #print( strongs5, "entryString", repr(entryString) )
            if BibleOrgSysGlobals.debugFlag:
                assert '\t' not in entryString and '\n' not in entryString
            entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>',
                                r'<StrongsRef>G\1</StrongsRef>', entryString )
            entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>',
                                r'<StrongsRef>G\1</StrongsRef>', entryString )
            entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>',
                                r'<StrongsRef>H\1</StrongsRef>', entryString )
            entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>',
                                r'<StrongsRef>H\1</StrongsRef>', entryString )
            if BibleOrgSysGlobals.debugFlag:
                assert 'strongsref' not in entryString
            entryResults['Entry'] = entryString
        #print( "entryResults", entryResults )
        self.StrongsEntries[strongs] = entryResults
Ejemplo n.º 53
0
    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLtree if you prefer.)
        """
        def makeList(parameter1, parameter2):
            """
            Returns a list containing all parameters. Parameter1 may already be a list.
            """
            if isinstance(parameter1, list):
                #assert( parameter2 not in parameter1 )
                parameter1.append(parameter2)
                return parameter1
            else:
                return [parameter1, parameter2]

        # end of makeList

        assert (self._XMLtree)
        if self.__DataList:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataList, self.__DataDict

        # We'll create a number of dictionaries with different elements as the key
        rawRefLinkList = []
        actualLinkCount = 0
        for element in self._XMLtree:
            #print( BibleOrgSysGlobals.elementStr( element ) )

            # Get these first for helpful error messages
            sourceReference = element.find('sourceReference').text
            sourceComponent = element.find('sourceComponent').text
            assert (sourceComponent in (
                'Section',
                'Verses',
                'Verse',
            ))

            BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1')
            BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference,
                                                    'kd21')
            BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20')

            actualRawLinksList = []
            for subelement in element:
                #print( BibleOrgSysGlobals.elementStr( subelement ) )
                if subelement.tag in (
                        'sourceReference',
                        'sourceComponent',
                ):  # already processed these
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'ls12')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sourceReference, 'ks02')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'sqw1')

                elif subelement.tag == 'BibleReferenceLink':
                    BibleOrgSysGlobals.checkXMLNoText(subelement,
                                                      sourceReference, 'haw9')
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'hs19')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'jsd9')

                    targetReference = subelement.find('targetReference').text
                    targetComponent = subelement.find('targetComponent').text
                    assert (targetComponent in (
                        'Section',
                        'Verses',
                        'Verse',
                    ))
                    linkType = subelement.find('linkType').text
                    assert (linkType in (
                        'TSK',
                        'QuotedOTReference',
                        'AlludedOTReference',
                        'PossibleOTReference',
                    ))

                    actualRawLinksList.append((
                        targetReference,
                        targetComponent,
                        linkType,
                    ))
                    actualLinkCount += 1

            rawRefLinkList.append((
                sourceReference,
                sourceComponent,
                actualRawLinksList,
            ))

        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  {} raw links loaded (with {} actual raw link entries)".
                  format(len(rawRefLinkList), actualLinkCount))

        myRefLinkList = []
        actualLinkCount = 0
        BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        for j, (sourceReference, sourceComponent,
                actualRawLinksList) in enumerate(rawRefLinkList):
            # Just do some testing first
            if sourceComponent == 'Verse':
                x = SimpleVerseKey(sourceReference)
            else:
                flag = False
                try:
                    x = SimpleVerseKey(sourceReference, ignoreParseErrors=True)
                    flag = True
                except TypeError:
                    pass  # This should happen coz it should fail the SVK
                if flag:
                    logging.error("{} {!r} failed!".format(
                        sourceComponent, sourceReference))
                    raise TypeError
            # Now do the actual parsing
            parsedSourceReference = FlexibleVersesKey(sourceReference)
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print(j, sourceComponent, sourceReference,
                      parsedSourceReference)
                #assert( parsedSourceReference.getShortText().replace(' ','_') == sourceReference )
            actualLinksList = []
            for k, (targetReference, targetComponent,
                    linkType) in enumerate(actualRawLinksList):
                # Just do some testing first
                if targetComponent == 'Verse':
                    x = SimpleVerseKey(targetReference)
                else:
                    flag = False
                    try:
                        x = SimpleVerseKey(targetReference,
                                           ignoreParseErrors=True)
                        flag = True
                    except TypeError:
                        pass  # This should happen coz it should fail the SVK
                    if flag:
                        logging.error("{} {!r} failed!".format(
                            targetComponent, targetReference))
                        raise TypeError
                # Now do the actual parsing
                try:
                    parsedTargetReference = FlexibleVersesKey(targetReference)
                except TypeError:
                    print(
                        "  Temporarily ignored {!r} (TypeError from FlexibleVersesKey)"
                        .format(targetReference))
                    parsedTargetReference = None
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print(' ', targetComponent, targetReference,
                          parsedTargetReference)
                    #assert( parsedTargetReference.getShortText().replace(' ','_',1) == targetReference )

                actualLinksList.append((
                    targetReference,
                    targetComponent,
                    parsedTargetReference,
                    linkType,
                ))
                actualLinkCount += 1

            myRefLinkList.append((
                sourceReference,
                sourceComponent,
                parsedSourceReference,
                actualLinksList,
            ))

        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("  {} links processed (with {} actual link entries)".format(
                len(rawRefLinkList), actualLinkCount))
        #print( myRefLinkList ); halt
        self.__DataList = myRefLinkList

        # Now put it into my dictionaries for easy access
        # This part should be customized or added to for however you need to process the data

        # Create a link dictionary (by verse key)
        myRefLinkDict = {}
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #print( sourceReference, sourceComponent, parsedSourceReference )
            #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for verseRef in parsedSourceReference.getIncludedVerses():
                #print( verseRef )
                assert (isinstance(verseRef, SimpleVerseKey))
                if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                myRefLinkDict[verseRef].append((
                    sourceReference,
                    sourceComponent,
                    parsedSourceReference,
                    actualLinksList,
                ))
            #print( myRefLinkDict ); halt
        originalLinks = len(myRefLinkDict)
        print(
            "  {} verse links added to dictionary (includes filling out spans)"
            .format(originalLinks))
        #print( myRefLinkDict ); halt

        # Create a reversed link dictionary (by verse key)
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #print( sourceReference, sourceComponent, parsedSourceReference )
            #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList:
                if parsedTargetReference is not None:
                    for verseRef in parsedTargetReference.getIncludedVerses():
                        #print( verseRef )
                        assert (isinstance(verseRef, SimpleVerseKey))
                        if linkType == 'TSK': reverseLinkType = 'TSKQuoted'
                        elif linkType == 'QuotedOTReference':
                            reverseLinkType = 'OTReferenceQuoted'
                        elif linkType == 'AlludedOTReference':
                            reverseLinkType = 'OTReferenceAlluded'
                        elif linkType == 'PossibleOTReference':
                            reverseLinkType = 'OTReferencePossible'
                        else:
                            halt  # Have a new linkType!
                        if verseRef not in myRefLinkDict:
                            myRefLinkDict[verseRef] = []
                        myRefLinkDict[verseRef].append(
                            (targetReference, targetComponent,
                             parsedTargetReference, [
                                 (sourceReference, sourceComponent,
                                  parsedSourceReference, reverseLinkType)
                             ]))
            #print( myRefLinkDict ); halt
        totalLinks = len(myRefLinkDict)
        reverseLinks = totalLinks - originalLinks
        print("  {} reverse links added to dictionary to give {} total".format(
            reverseLinks, totalLinks))
        #print( myRefLinkDict ); halt

        self.__DataDict = myRefLinkDict

        # Let's find the most number of references for a verse
        mostReferences = totalReferences = 0
        for verseRef, entryList in self.__DataDict.items():
            numRefs = len(entryList)
            if numRefs > mostReferences:
                mostReferences, mostVerseRef = numRefs, verseRef
            totalReferences += numRefs
        print("  {} maximum links for any one reference ({})".format(
            mostReferences, mostVerseRef.getShortText()))
        print("  {} total links for all references".format(totalReferences))

        return self.__DataList, self.__DataDict
    def __validateSystem(self, bookOrderTree, systemName):
        """ Do a semi-automatic check of the XML file validity. """
        assert bookOrderTree

        uniqueDict = {}
        for elementName in self.uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self.uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for k, element in enumerate(bookOrderTree):
            if element.tag == self.mainElementTag:
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check ascending ID field
                ID = element.get("id")
                intID = int(ID)
                if intID != expectedID:
                    logging.error(
                        _("ID numbers out of sequence in record {} (got {} when expecting {}) for {}"
                          ).format(k, intID, expectedID, systemName))
                expectedID += 1

                # Check that this is unique
                if element.text:
                    if element.text in uniqueDict:
                        logging.error(
                            _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {}) for {}"
                              ).format(element.text, element.tag, ID, k,
                                       systemName))
                    uniqueDict[element.text] = None

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, k))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, k))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, k))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, k))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, k))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, k))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, k))
Ejemplo n.º 55
0
def getChangeLogFilepath( loggingFolder, projectName ):
    """
    """
    return os.path.join( loggingFolder, \
                        BibleOrgSysGlobals.makeSafeFilename( projectName.replace(' ','_') + '_ChangeLog.txt' ) )
Ejemplo n.º 56
0
    def _validateSystem(self, punctuationTree, systemName):
        """
        """
        assert punctuationTree

        uniqueDict = {}
        for elementName in self.uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self.uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        for k, element in enumerate(punctuationTree):
            if element.tag in self.mainElementTags:
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, k))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, k))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, k))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, k))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, k))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, k))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, k))
Ejemplo n.º 57
0
def demo():
    """
    Demonstrate reading and checking some Bible databases.
    """
    if BibleOrgSysGlobals.verbosityLevel > 0: print(ProgNameVersion)

    if 1:  # Load and process some of our test versions
        count = 0
        for name, abbreviation, testFolder in (  # name, abbreviation, folder
            (
                "Open English Translation—Literal Version",
                "OET-LV",
                "../../../../../Data/Work/Matigsalug/Bible/OET-LV/",
            ),
                #("Matigsalug", "MBTV", "../../../../../Data/Work/Matigsalug/Bible/MBTV/",),
                #("ESFM Test 1", "OET-LV", "Tests/DataFilesForTests/ESFMTest1/"),
                #("ESFM Test 2", "OET-RV", "Tests/DataFilesForTests/ESFMTest2/"),
                #("All Markers Project", "WEB+", "Tests/DataFilesForTests/USFMAllMarkersProject/"),
                #("USFM Error Project", "UEP", "Tests/DataFilesForTests/USFMErrorProject/"),
                #("BOS Exported Files", "Exported", "Tests/BOS_USFM_Export/"),
        ):
            count += 1
            if os.access(testFolder, os.R_OK):
                if BibleOrgSysGlobals.verbosityLevel > 0:
                    print("\nESFM A{}/".format(count))
                EsfmB = ESFMBible(testFolder, name, abbreviation)
                EsfmB.load()
                print("Gen assumed book name:",
                      repr(EsfmB.getAssumedBookName('GEN')))
                print("Gen long TOC book name:",
                      repr(EsfmB.getLongTOCName('GEN')))
                print("Gen short TOC book name:",
                      repr(EsfmB.getShortTOCName('GEN')))
                print("Gen book abbreviation:",
                      repr(EsfmB.getBooknameAbbreviation('GEN')))
                if BibleOrgSysGlobals.verbosityLevel > 0: print(EsfmB)
                if BibleOrgSysGlobals.strictCheckingFlag:
                    EsfmB.check()
                    #print( EsfmB.books['GEN']._processedLines[0:40] )
                    EsfmBErrors = EsfmB.getErrors()
                    # print( UBErrors )
                if BibleOrgSysGlobals.commandLineOptions.export:
                    ##EsfmB.toDrupalBible()
                    EsfmB.doAllExports(wantPhotoBible=False,
                                       wantODFs=True,
                                       wantPDFs=True)
                    newObj = BibleOrgSysGlobals.unpickleObject(
                        BibleOrgSysGlobals.makeSafeFilename(abbreviation) +
                        '.pickle',
                        os.path.join("OutputFiles/",
                                     "BOS_Bible_Object_Pickle/"))
                    if BibleOrgSysGlobals.verbosityLevel > 0:
                        print("newObj is", newObj)
            else:
                print(
                    "\nSorry, test folder {!r} is not readable on this computer."
                    .format(testFolder))

    if 0:  # Test a whole folder full of folders of ESFM Bibles
        testBaseFolder = "Tests/DataFilesForTests/theWordRoundtripTestFiles/"

        def findInfo(somepath):
            """ Find out info about the project from the included copyright.htm file """
            cFilepath = os.path.join(somepath, "copyright.htm")
            if not os.path.exists(cFilepath): return
            with open(cFilepath, encoding='utf-8'
                      ) as myFile:  # Automatically closes the file when done
                lastLine, lineCount = None, 0
                title, nameDict = None, {}
                for line in myFile:
                    lineCount += 1
                    if lineCount == 1 and line and line[0] == chr(
                            65279):  #U+FEFF
                        logging.info(
                            "ESFMBible: Detected UTF-16 Byte Order Marker in copyright.htm file"
                        )
                        line = line[1:]  # Remove the UTF-8 Byte Order Marker
                    if line[-1] == '\n':
                        line = line[:-1]  # Removing trailing newline character
                    if not line: continue  # Just discard blank lines
                    lastLine = line
                    if line.startswith("<title>"):
                        title = line.replace("<title>",
                                             "").replace("</title>",
                                                         "").strip()
                    if line.startswith('<option value="'):
                        adjLine = line.replace('<option value="',
                                               '').replace('</option>', '')
                        ESFM_BBB, name = adjLine[:3], adjLine[11:]
                        BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromESFM(
                            ESFM_BBB)
                        #print( ESFM_BBB, BBB, name )
                        nameDict[BBB] = name
            return title, nameDict

        # end of findInfo

        count = totalBooks = 0
        if os.access(testBaseFolder,
                     os.R_OK):  # check that we can read the test data
            for something in sorted(os.listdir(testBaseFolder)):
                somepath = os.path.join(testBaseFolder, something)
                if os.path.isfile(somepath):
                    print("Ignoring file {!r} in {!r}".format(
                        something, testBaseFolder))
                elif os.path.isdir(
                        somepath
                ):  # Let's assume that it's a folder containing a ESFM (partial) Bible
                    #if not something.startswith( 'ssx' ): continue # This line is used for debugging only specific modules
                    count += 1
                    title = None
                    findInfoResult = findInfo(somepath)
                    if findInfoResult: title, bookNameDict = findInfoResult
                    if title is None:
                        title = something[:-5] if something.endswith(
                            "_usfm") else something
                    name, testFolder = title, somepath
                    if os.access(testFolder, os.R_OK):
                        if BibleOrgSysGlobals.verbosityLevel > 0:
                            print("\nESFM B{}/".format(count))
                        EsfmB = ESFMBible(testFolder, name)
                        EsfmB.load()
                        if BibleOrgSysGlobals.verbosityLevel > 0: print(EsfmB)
                        if BibleOrgSysGlobals.strictCheckingFlag:
                            EsfmB.check()
                            EsfmBErrors = EsfmB.getErrors()
                            #print( EsfmBErrors )
                        if BibleOrgSysGlobals.commandLineOptions.export:
                            EsfmB.doAllExports(wantPhotoBible=False,
                                               wantODFs=False,
                                               wantPDFs=False)
                    else:
                        print(
                            "\nSorry, test folder {!r} is not readable on this computer."
                            .format(testFolder))
            if count:
                print("\n{} total ESFM (partial) Bibles processed.".format(
                    count))
            if totalBooks:
                print("{} total books ({} average per folder)".format(
                    totalBooks, round(totalBooks / count)))
        else:
            print(
                "\nSorry, test folder {!r} is not readable on this computer.".
                format(testBaseFolder))
Ejemplo n.º 58
0
        def loadParagraph( paragraphXML, paragraphlocation ):
            """ Load a paragraph from the USX XML.
                Uses (and updates) c,v information from the containing function. """
            nonlocal c, v

            # Process the attributes first
            paragraphStyle = None
            for attrib,value in paragraphXML.items():
                if attrib=='style':
                    paragraphStyle = value # This is basically the USFM marker name
                else:
                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )

            # Now process the paragraph text (or write a paragraph marker anyway)
            self.addLine( paragraphStyle, paragraphXML.text if paragraphXML.text and paragraphXML.text.strip() else '' )

            # Now process the paragraph subelements
            for element in paragraphXML:
                location = element.tag + ' ' + paragraphlocation
                #print( "USXXMLBibleBook.load", c, v, element.tag, location )
                if element.tag == 'verse': # milestone (not a container)
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    verseStyle = None
                    for attrib,value in element.items():
                        if attrib=='number':
                            v = value
                        elif attrib=='style':
                            verseStyle = value
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    if verseStyle != 'v':
                        logging.warning( _("Unexpected style attribute ({}) in {}").format( verseStyle, location ) )
                    self.addLine( verseStyle, v + ' ' )
                    # Now process the tail (if there's one) which is the verse text
                    if element.tail:
                        vText = element.tail.strip()
                        if vText:
                            #print( repr(vText) )
                            self.appendToLastLine( vText )
                elif element.tag == 'char':
                    # Process the attributes first
                    charStyle = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            charStyle = value # This is basically the USFM character marker name
                            #print( "  charStyle", charStyle )
                            assert( not BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( charStyle ) )
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    charLine = "\\{} {} ".format( charStyle, element.text )
                    # Now process the subelements -- chars are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( c, v, element.tag )
                        if subelement.tag == 'char': # milestone (not a container)
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            subCharStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style': subCharStyle = value
                                elif attrib=='closed':
                                    assert( value=='false' )
                                    charClosed = False
                                else:
                                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            charLine += "\\{} {}".format( subCharStyle, subelement.text )
                            if charClosed: charLine += "\\{}*".format( subCharStyle )
                            charLine += '' if subelement.tail is None else subelement.tail.strip()
                        else:
                            logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) )
                            self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) )
                    # A character field must be added to the previous field
                    charLine += "\\{}*{}".format( charStyle, '' if element.tail is None else element.tail.strip() )
                    if debuggingThisModule: print( "USX.loadParagraph:", c, v, paragraphStyle, charStyle, repr(charLine) )
                    self.appendToLastLine( charLine )
                elif element.tag == 'note':
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    # Process the attributes first
                    noteStyle = noteCaller = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            noteStyle = value # This is basically the USFM marker name
                            assert( noteStyle in ('x','f',) )
                        elif attrib=='caller':
                            noteCaller = value # Usually hyphen or a symbol to be used for the note
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    assert( noteStyle and noteCaller ) # both compulsory
                    noteLine = "\\{} {} ".format( noteStyle, noteCaller )
                    # Now process the subelements -- notes are one of the few multiply embedded fields in USX
                    for subelement in element:
                        sublocation = subelement.tag + ' ' + location
                        #print( c, v, element.tag )
                        if subelement.tag == 'char': # milestone (not a container)
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            charStyle, charClosed = None, True
                            for attrib,value in subelement.items():
                                if attrib=='style':
                                    charStyle = value
                                elif attrib=='closed':
                                    assert( value=='false' )
                                    charClosed = False
                                else:
                                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            noteLine += "\\{} {}".format( charStyle, subelement.text )
                            if charClosed: noteLine += "\\{}*".format( charStyle )
                            noteLine += '' if subelement.tail is None else subelement.tail.strip()
                        elif subelement.tag == 'unmatched': # Used to denote errors in the source text
                            BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation )
                            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation )
                            # Process the attributes first
                            unmmatchedMarker = None
                            for attrib,value in subelement.items():
                                if attrib=='marker':
                                    unmmatchedMarker = value
                                else:
                                    logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                            self.addPriorityError( 2, c, v, _("Unmatched subelement for {} in {}").format( repr(unmmatchedMarker), sublocation) if unmmatchedMarker else _("Unmatched subelement in {}").format( sublocation) )
                        else:
                            logging.warning( _("Unprocessed {} subelement after {} {}:{} in {}").format( subelement.tag, self.BBB, c, v, sublocation ) )
                            self.addPriorityError( 1, c, v, _("Unprocessed {} subelement").format( subelement.tag ) )
                    if subelement.tail and subelement.tail.strip(): noteLine += subelement.tail
                    #noteLine += "\\{}*".format( charStyle )
                    noteLine += "\\{}*".format( noteStyle )
                    if element.tail:
                        noteText = element.tail.strip()
                        noteLine += noteText
                    self.appendToLastLine( noteLine )
                elif element.tag == 'link': # Used to include extra resources
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    # Process the attributes first
                    linkStyle = linkDisplay = linkTarget = None
                    for attrib,value in element.items():
                        if attrib=='style':
                            linkStyle = value
                            assert( linkStyle in ('jmp',) )
                        elif attrib=='display':
                            linkDisplay = value # e.g., "click here"
                        elif attrib=='target':
                            linkTarget = value # e.g., some reference
                        else:
                            logging.warning( _("Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    self.addPriorityError( 3, c, v, _("Unprocessed {} link to {} in {}").format( repr(linkDisplay), repr(linkTarget), location) )
                elif element.tag == 'unmatched': # Used to denote errors in the source text
                    BibleOrgSysGlobals.checkXMLNoText( element, location )
                    BibleOrgSysGlobals.checkXMLNoTail( element, location )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, location )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, location )
                    self.addPriorityError( 2, c, v, _("Unmatched element in {}").format( location) )
                else:
                    logging.warning( _("Unprocessed {} element after {} {}:{} in {}").format( element.tag, self.BBB, c, v, location ) )
                    self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                    for x in range(max(0,len(self)-10),len(self)): print( x, self._rawLines[x] )
                    if BibleOrgSysGlobals.debugFlag: halt
Ejemplo n.º 59
0
            self.assertGreater( len(something), 1 )
        result4 = self.UMs.getTypicalNoteSets( 'xr' )
        self.assertTrue( isinstance( result4, tuple ) )
        self.assertLess( len(result4), len(result1) )
        for something in result4:
            self.assertTrue( isinstance( something , list ) )
            self.assertTrue( something )
            self.assertGreater( len(something), 1 )
        result5 = self.UMs.getTypicalNoteSets( 'pq' )
        self.assertEqual( result5, None )
    #end of test_2200_getTypicalNoteSets

    def test_2210_getMarkerListFromText( self ):
        """ Test the getMarkerListFromText function. """
        self.assertEqual( self.UMs.getMarkerListFromText(''), [] )
        self.assertEqual( self.UMs.getMarkerListFromText('This is just plain text.'), [] )
        self.assertEqual( self.UMs.getMarkerListFromText('This \\bk book\\bk* is good'), \
                                [('bk',5,' ','\\bk ',['bk'],1,'book'), ('bk',13,'*','\\bk*',[],None,' is good')] )
    #end of test_2210_getMarkerListFromText
# end of USFMMarkersTests class


if __name__ == '__main__':
    # Configure basic set-up
    parser = BibleOrgSysGlobals.setup( ProgName, ProgVersion )
    BibleOrgSysGlobals.addStandardOptionsAndProcess( parser )

    if BibleOrgSysGlobals.verbosityLevel > 1: print( ProgNameVersion )

    unittest.main() # Automatically runs all of the above tests
# end of USFMMarkersTests.py
Ejemplo n.º 60
0
    def __validateAndExtractHeader( self ):
        """
        Extracts information out of the header record, such as:
            <INFORMATION>
            <title>King James Version</title>
            <creator></creator>
            <subject>The Holy Bible</subject>
            <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description>
            <publisher>FREE BIBLE SOFTWARE GROUP</publisher>
            <contributors />
            <date>2009-01-23</date>
            <type>Bible</type>
            <format>Haggai XML Bible Markup Language</format>
            <identifier>kjv</identifier>
            <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source>
            <language>ENG</language>
            <coverage>provide the Bible to the nations of the world</coverage>
            <rights>We believe that this Bible is found in the Public Domain.</rights>
        </INFORMATION>
        """
        if BibleOrgSysGlobals.debugFlag: assert self.header
        location = 'Header'
        BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' )
        BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' )
        BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' )

        # TODO: We probably need to rationalise some of the self.xxx stores
        for element in self.header:
            #print( "header", element.tag )
            if element.tag == 'title':
                sublocation = "title in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.title = element.text
            elif element.tag == 'creator':
                sublocation = "creator in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.creator = element.text
            elif element.tag == 'subject':
                sublocation = "subject in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.subject = element.text
            elif element.tag == 'description':
                sublocation = "description in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.description = element.text
            elif element.tag == 'publisher':
                sublocation = "publisher in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.publisher = element.text
            elif element.tag == 'contributor':
                sublocation = "contributor in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' )
                if element.text:
                    try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list
                    except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one
            elif element.tag == 'contributors':
                sublocation = "contributors in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.contributors = element.text
            elif element.tag == 'date':
                sublocation = "date in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.date = element.text
            elif element.tag == 'type':
                sublocation = "type in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.documentType = element.text
            elif element.tag == 'format':
                sublocation = "format in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                if BibleOrgSysGlobals.debugFlag: assert element.text == 'Haggai XML Bible Markup Language'
            elif element.tag == 'identifier':
                sublocation = "identifier in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.identifier = element.text
            elif element.tag == 'source':
                sublocation = "source in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.source = element.text
            elif element.tag == 'language':
                sublocation = "language in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.language = element.text
            elif element.tag == 'coverage':
                sublocation = "coverage in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.coverage = element.text
            elif element.tag == 'rights':
                sublocation = "rights in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.rights = element.text
            else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) )