예제 #1
0
class USFXXMLBible( Bible ):
    """
    Class to load and manipulate USFX Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ):
        """
        Create the internal USFX Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFX XML Bible object"
        self.objectTypeString = "USFX"

        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters

        # Now we can set our object variables
        self.name = self.givenName
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFX Bible"
        if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects

        # Do a preliminary check on the readability of our folder
        if not os.access( self.sourceFolder, os.R_OK ):
            logging.error( "USFXXMLBible: Folder {!r} is unreadable".format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        self.sourceFilename = self.sourceFilepath = None
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundFiles.append( something )
            else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, foundFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in {!r}".format( self.sourceFolder ) )
            return # No use continuing

        #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles )
        numFound = 0
        for thisFilename in sorted( foundFiles ):
            firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, sourceFolder, numLines=3 )
            if not firstLines or len(firstLines)<2: continue
            if not firstLines[0].startswith( '<?xml version="1.0"' ) \
            and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) )
                continue
            if "<usfx " not in firstLines[0]:
                continue
            lastFilenameFound = thisFilename
            numFound += 1
        if numFound:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound )
            if numFound == 1:
                self.sourceFilename = lastFilenameFound
                self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename )
        elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )
    # end of USFXXMLBible.__init_


    def load( self ):
        """
        Load the XML data file -- we should already know the filepath.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

                                #if BibleOrgSysGlobals.verbosityLevel > 2: print( _("  It seems we have {}...").format( BBB ) )
                        #self.thisBook = BibleBook( self, BBB )
                        #self.thisBook.objectNameString = "OSIS XML Bible Book object"
                        #self.thisBook.objectTypeString = "OSIS"
                        #self.haveBook = True

        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError:
            errorString = sys.exc_info()[1]
            logging.critical( "USFXXMLBible.load: failed loading the xml file {}: {!r}.".format( self.sourceFilepath, errorString ) )
            return
        if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (osis) container
        if self.tree.tag == 'usfx':
            location = "USFX file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )
            # Process the attributes first
            self.schemaLocation = None
            for attrib,value in self.tree.items():
                #print( "attrib", repr(attrib), repr(value) )
                if attrib.endswith("SchemaLocation"):
                    self.schemaLocation = value
                else:
                    logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) )
            BBB = C = V = None
            for element in self.tree:
                #print( "element", repr(element.tag) )
                sublocation = element.tag + " " + location
                if element.tag == 'languageCode':
                    self.languageCode = element.text
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'cff3' )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'des1' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'dwf2' )
                elif element.tag == 'book':
                    self.loadBook( element )
                    ##BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '54f2' )
                    #BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'hd35' )
                    ## Process the attributes
                    #idField = bookStyle = None
                    #for attrib,value in element.items():
                        #if attrib=='id' or attrib=='code':
                            #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            ##if idField != BBB:
                            ##    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        #elif attrib=='style':
                            #bookStyle = value
                        #else:
                            #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                else:
                    logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) )
                    #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print( "USFXXMLBible.load: Didn't find any regularly named USFX files in {!r}".format( self.sourceFolder ) )
            for thisFilename in foundFiles:
                # Look for BBB in the ID line (which should be the first line in a USFX file)
                isUSFX = False
                thisPath = os.path.join( self.sourceFolder, thisFilename )
                with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done
                    for line in possibleUSXFile:
                        if line.startswith( '\\id ' ):
                            USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id
                            if BibleOrgSysGlobals.verbosityLevel > 2: print( "Have possible USFX ID {!r}".format( USXId ) )
                            BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( USXId )
                            if BibleOrgSysGlobals.verbosityLevel > 2: print( "BBB is {!r}".format( BBB ) )
                            isUSFX = True
                        break # We only look at the first line
                if isUSFX:
                    UBB = USFXXMLBibleBook( self, BBB )
                    UBB.load( self.sourceFolder, thisFilename, self.encoding )
                    UBB.validateMarkers()
                    print( UBB )
                    self.books[BBB] = UBB
                    # Make up our book name dictionaries while we're at it
                    assumedBookNames = UBB.getAssumedBookNames()
                    for assumedBookName in assumedBookNames:
                        self.BBBToNameDict[BBB] = assumedBookName
                        assumedBookNameLower = assumedBookName.lower()
                        self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces)
            if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) )
        self.doPostLoadProcessing()
    # end of USFXXMLBible.load


    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        BibleOrgSysGlobals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        BibleOrgSysGlobals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, BBB, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'vsg3' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'jsa0' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'dj35' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'hs35' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.addLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ss13' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.addLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation, BBB, C, V )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation, BBB, C, V )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, BBB, C )
            elif element.tag == 'b':
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'nd04' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.addLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'od01' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'gd92' )
                idField = None
                for attrib,value in element.items():
                    if attrib == 'id': idField = value
                    else:
                        logging.warning( _("dv35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if idField and text is None:
                    text = idField
                else:
                    logging.warning( _("dve4 Unprocessed idField ({}) in {}").format( idField, location ) )
                if text is None:
                    logging.critical( "Why is {} empty at {}".format( marker, location ) )
                assert( text is not None )
                self.thisBook.addLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 've': # What's this in Psalms: <c id="4" /><ve /><d>For the Chief Musician; on stringed instruments. A Psalm of David.</d>
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'kds3' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ks29' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'kj24' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js91' )
                #self.thisBook.addLine( 'b', '' )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring 've' field", BBB, C, V )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
        self.saveBook( self.thisBook )
    # end of USFXXMLBible.loadBook


    def loadParagraph( self, paragraphElement, paragraphLocation, BBB, C ):
        """
        Load the paragraph (p or q) container from the XML data file.
        """
        #if BibleOrgSysGlobals.verbosityLevel > 3:
            #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        V = None
        pTag, pText = paragraphElement.tag, clean(paragraphElement.text)
        BibleOrgSysGlobals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' )

        # Process the attributes first
        sfm = level = style = None
        for attrib,value in paragraphElement.items():
            if attrib == 'sfm': sfm = value
            elif attrib == 'level': level = value
            elif attrib == 'style': style = value
            else:
                logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) )

        if sfm:
            assert( pTag == 'p' )
            pTag = sfm
        if level:
            #assert( pTag == 'q' ) # Could also be mt, etc.
            pTag += level
        if style:
            #print( repr(pTag), repr(pText), repr(style) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring {!r} style".format( style ) )

        self.thisBook.addLine( pTag, '' if pText is None else pText )

        for element in paragraphElement:
            location = element.tag + " of " + paragraphLocation
            #print( "element", repr(element.tag) )
            if element.tag == 'v': # verse milestone
                vTail = clean( element.tail ) # Main verse text
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'crc2' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'lct3' )
                lastV, V = V, None
                for attrib,value in element.items():
                    if attrib == 'id':
                        V = value
                    else:
                        logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                assert( V is not None )
                assert( V )
                self.thisBook.addLine( 'v', V + ((' '+vTail) if vTail else '' ) )
            elif element.tag == 've': # verse end milestone -- we can just ignore this
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'lsc3' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'mfy4' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'bd24' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ks35' )
            elif element.tag == 'fig':
                self.loadFigure( element, location )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) )
                self.loadFootnote( element, location, BBB, C, V )
            elif element.tag == 'x':
                #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) )
                self.loadCrossreference( element, location )
            elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                self.loadCharacterFormatting( element, location, BBB, C, V )
            elif element.tag == 'cs': # character style -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kf92' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) )
            elif element.tag in ('cp',): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'kdf0' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'lkj1' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'da13' )
                self.thisBook.addLine( marker, text )
            elif element.tag == 'ref': # encoded reference -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'bd83' )
                target = None
                for attrib,value in element.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
                #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
            elif element.tag == 'optionalLineBreak':
                print( "What is loadParagraph optionalLineBreak?" )
                if BibleOrgSysGlobals.debugFlag: halt
            elif element.tag == 'milestone': # e.g., <milestone sfm="pb" attribute=""/> (pb = explicit page break)
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'jzx2' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ms23' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'dw24' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("mcd2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('pb',): print( "milestone sfm got", repr(sfm) )
                self.thisBook.addLine( sfm, '' )
            else:
                logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) )
        return V
    # end of USFXXMLBible.loadParagraph


    def loadCharacterFormatting( self, element, location, BBB, C, V ):
        """
        """
        marker, text, tail = element.tag, clean(element.text), clean(element.tail)
        BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'sd12' )
        self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            #print( "element", repr(element.tag) )
            if subelement.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
                self.loadFootnote( subelement, sublocation, BBB, C, V )
            else:
                logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) )
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
        self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadCharacterFormatting


    def loadFigure( self, element, location ):
        """
        """
        BibleOrgSysGlobals.checkXMLNoText( element, location, 'ff36' )
        BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'cf35' )
        figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            figTag, figText = subelement.tag, clean(subelement.text)
            assert( figTag in figDict )
            figDict[figTag] = '' if figText is None else figText
            BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
            BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
        newString = ''
        for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
            newString += ('' if j==0 else '|') + figDict[tag]
        figTail = clean( element.tail )
        self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
    # end of USFXXMLBible.loadFigure


    def loadTable( self, element, location ):
        """
        """
        BibleOrgSysGlobals.checkXMLNoText( element, location, 'kg92' )
        BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ka92' )
        BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'ks63' )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            if subelement.tag == 'tr':
                #print( "table", sublocation )
                self.thisBook.addLine( 'tr', '' )
                BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'sg32' )
                BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'dh82' )
                BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'mniq' )
                for sub2element in subelement:
                    sub2location = sub2element.tag + " of " + sublocation
                    tag, text = sub2element.tag, clean(sub2element.text)
                    assert( tag in ('th', 'thr', 'tc', 'tcr',) )
                    BibleOrgSysGlobals.checkXMLNoTail( sub2element, sub2location, 'ah82' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' )
                    level = None
                    for attrib,value in sub2element.items():
                        if attrib == 'level': level = value
                        else:
                            logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    marker = tag + (level if level else '')
                    self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
            else:
                logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) )
    # end of USFXXMLBible.loadTable


    def loadFootnote( self, element, location, BBB, C, V ):
        """
        Handles footnote fields, including xt field.
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) )
            #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',):
                #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) )
            if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq','xt',) )
            if marker=='ref':
                assert( fText )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'ls13' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) )
                else: halt
            else:
                BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'dq54' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) )
                if marker=='xt' or marker[0]=='f': # Starts with f, e.g., fr, ft
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'js72' )
                        if marker2 == 'ref':
                            #print( sub2location )
                            if fText2:
                                #print( 'ft2', marker2, repr(fText2), repr(fTail2), sub2location )
                                self.thisBook.appendToLastLine( fText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value # OSIS style reference, e.g., '1SA.27.8'
                                else:
                                    logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                #print( 'tg', marker2, repr(target) )
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else:
                                if debuggingThisModule: halt
                        elif marker2 in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                            self.loadCharacterFormatting( sub2element, sub2location, BBB, C, V )
                        else:
                            print( 'Ignored marker2', repr(marker2), BBB, C, V )
                            if debuggingThisModule: halt
                        if fTail2: self.thisBook.appendToLastLine( fTail2 )
                elif marker in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                    self.loadCharacterFormatting( subelement, sublocation, BBB, C, V )
                else:
                    print( 'Ignored marker', repr(marker), BBB, C, V )
                    halt
            if fTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) )
        self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadFootnote


    def loadCrossreference( self, element, location ):
        """
        Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
            #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
                #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
            if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','xo','xt',) )
            if marker=='ref':
                assert( xText )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
                else: halt
            else:
                BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
                if marker[0] == 'x': # Starts with x, e.g., xo, xt
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                        if marker2=='ref':
                            if xText2:
                                #print( 'xt2', marker2, repr(xText2), repr(xTail2), sub2location )
                                self.thisBook.appendToLastLine( xText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                        if xTail2: self.thisBook.appendToLastLine( xTail2 )
                else: halt
            if xTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
        self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
예제 #2
0
class USFXXMLBible( Bible ):
    """
    Class to load and manipulate USFX Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ):
        """
        Create the internal USFX Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFX XML Bible object"
        self.objectTypeString = "USFX"

        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters

        # Now we can set our object variables
        self.name = self.givenName
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFX Bible"
        if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects

        # Do a preliminary check on the readability of our folder
        if not os.access( self.sourceFolder, os.R_OK ):
            logging.error( "USFXXMLBible: Folder '{}' is unreadable".format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        self.sourceFilename = self.sourceFilepath = None
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundFiles.append( something )
            else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, foundFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in '{}'".format( self.sourceFolder ) )
            return # No use continuing

        #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles )
        numFound = 0
        for thisFilename in sorted( foundFiles ):
            firstLines = Globals.peekIntoFile( thisFilename, sourceFolder, numLines=3 )
            if not firstLines or len(firstLines)<2: continue
            if not firstLines[0].startswith( '<?xml version="1.0"' ) \
            and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM
                if Globals.verbosityLevel > 2: print( "USFXB (unexpected) first line was '{}' in {}".format( firstLines, thisFilename ) )
                continue
            if "<usfx " not in firstLines[0]:
                continue
            lastFilenameFound = thisFilename
            numFound += 1
        if numFound:
            if Globals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound )
            if numFound == 1:
                self.sourceFilename = lastFilenameFound
                self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename )
        elif looksHopeful and Globals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )
    # end of USFXXMLBible.__init_


    def load( self ):
        """
        Load the XML data file -- we should already know the filepath.
        """
        if Globals.verbosityLevel > 1:
            print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

                                #if Globals.verbosityLevel > 2: print( _("  It seems we have {}...").format( BBB ) )
                        #self.thisBook = BibleBook( self, BBB )
                        #self.thisBook.objectNameString = "OSIS XML Bible Book object"
                        #self.thisBook.objectTypeString = "OSIS"
                        #self.haveBook = True

        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError:
            errorString = sys.exc_info()[1]
            logging.critical( "USFXXMLBible.load: failed loading the xml file {}: '{}'.".format( self.sourceFilepath, errorString ) )
            return
        if Globals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (osis) container
        if self.tree.tag == 'usfx':
            location = "USFX file"
            Globals.checkXMLNoText( self.tree, location, '4f6h' )
            Globals.checkXMLNoTail( self.tree, location, '1wk8' )
            # Process the attributes first
            self.schemaLocation = None
            for attrib,value in self.tree.items():
                #print( "attrib", repr(attrib), repr(value) )
                if attrib.endswith("SchemaLocation"):
                    self.schemaLocation = value
                else:
                    logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) )
            BBB = C = V = None
            for element in self.tree:
                #print( "element", repr(element.tag) )
                sublocation = element.tag + " " + location
                if element.tag == 'languageCode':
                    self.languageCode = element.text
                    Globals.checkXMLNoTail( element, sublocation, 'cff3' )
                    Globals.checkXMLNoAttributes( element, sublocation, 'des1' )
                    Globals.checkXMLNoSubelements( element, sublocation, 'dwf2' )
                elif element.tag == 'book':
                    self.loadBook( element )
                    ##Globals.checkXMLNoSubelements( element, sublocation, '54f2' )
                    #Globals.checkXMLNoTail( element, sublocation, 'hd35' )
                    ## Process the attributes
                    #idField = bookStyle = None
                    #for attrib,value in element.items():
                        #if attrib=='id' or attrib=='code':
                            #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            ##if idField != BBB:
                            ##    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        #elif attrib=='style':
                            #bookStyle = value
                        #else:
                            #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                else:
                    logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) )
                    #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here
            if Globals.verbosityLevel > 2:
                print( "USFXXMLBible.load: Didn't find any regularly named USFX files in '{}'".format( self.sourceFolder ) )
            for thisFilename in foundFiles:
                # Look for BBB in the ID line (which should be the first line in a USFX file)
                isUSFX = False
                thisPath = os.path.join( self.sourceFolder, thisFilename )
                with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done
                    for line in possibleUSXFile:
                        if line.startswith( '\\id ' ):
                            USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id
                            if Globals.verbosityLevel > 2: print( "Have possible USFX ID '{}'".format( USXId ) )
                            BBB = Globals.BibleBooksCodes.getBBBFromUSFM( USXId )
                            if Globals.verbosityLevel > 2: print( "BBB is '{}'".format( BBB ) )
                            isUSFX = True
                        break # We only look at the first line
                if isUSFX:
                    UBB = USFXXMLBibleBook( self, BBB )
                    UBB.load( self.sourceFolder, thisFilename, self.encoding )
                    UBB.validateMarkers()
                    print( UBB )
                    self.books[BBB] = UBB
                    # Make up our book name dictionaries while we're at it
                    assumedBookNames = UBB.getAssumedBookNames()
                    for assumedBookName in assumedBookNames:
                        self.BBBToNameDict[BBB] = assumedBookName
                        assumedBookNameLower = assumedBookName.lower()
                        self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces)
            if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) )
        self.doPostLoadProcessing()
    # end of USFXXMLBible.load


    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if Globals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if Globals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'vsg3' )
                Globals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'jsa0' )
                Globals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                Globals.checkXMLNoTail( element, location, 'dj35' )
                Globals.checkXMLNoAttributes( element, location, 'hs35' )
                Globals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.appendLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                Globals.checkXMLNoTail( element, location, 'ss13' )
                Globals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.appendLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, C )
            elif element.tag == 'b':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoAttributes( element, location, 'nd04' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.appendLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'od01' )
                Globals.checkXMLNoAttributes( element, location, 'us91' )
                Globals.checkXMLNoSubelements( element, location, 'gd92' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if Globals.debugFlag: halt
        self.saveBook( self.thisBook )
    # end of USFXXMLBible.loadBook


    def loadParagraph( self, paragraphElement, paragraphLocation, C ):
        """
        Load the paragraph (p or q) container from the XML data file.
        """
        #if Globals.verbosityLevel > 3:
            #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        V = None
        pText = paragraphElement.text
        Globals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' )

        # Process the attributes first
        sfm = level = style = None
        for attrib,value in paragraphElement.items():
            if attrib == 'sfm':
                sfm = value
            elif attrib == 'level':
                level = value
            elif attrib == 'style':
                style = value
            else:
                logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) )

        for element in paragraphElement:
            location = element.tag + " of " + paragraphLocation
            #print( "element", repr(element.tag) )
            if element.tag == 'v': # verse milestone
                vTail = clean( element.tail ) # Main verse text
                Globals.checkXMLNoText( element, location, 'crc2' )
                Globals.checkXMLNoSubelements( element, location, 'lct3' )
                lastV, V = V, None
                for attrib,value in element.items():
                    if attrib == 'id':
                        V = value
                    else:
                        logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                assert( V is not None )
                assert( V )
                self.thisBook.appendLine( 'v', V + ((' '+vTail) if vTail else '' ) )
            elif element.tag == 've': # verse end milestone -- we can just ignore this
                Globals.checkXMLNoText( element, location, 'lsc3' )
                Globals.checkXMLNoTail( element, location, 'mfy4' )
                Globals.checkXMLNoAttributes( element, location, 'bd24' )
                Globals.checkXMLNoSubelements( element, location, 'ks35' )
            elif element.tag == 'fig':
                self.loadFigure( element, location )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) )
                self.loadFootnote( element, location )
            elif element.tag == 'x':
                #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) )
                self.loadCrossreference( element, location )
            elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                self.loadCharacterFormatting( element, location )
            elif element.tag == 'cs': # character style -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'kf92' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) )
            elif element.tag in ('cp',): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'kdf0' )
                Globals.checkXMLNoAttributes( element, location, 'lkj1' )
                Globals.checkXMLNoSubelements( element, location, 'da13' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'ref': # encoded reference -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'bd83' )
                target = None
                for attrib,value in element.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
                #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
            elif element.tag == 'optionalLineBreak':
                print( "What is loadParagraph optionalLineBreak?" )
                if Globals.debugFlag: halt
            elif element.tag == 'milestone':
                print( "What is loadParagraph milestone?" )
                if Globals.debugFlag: halt
            else:
                logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) )
        return V
    # end of USFXXMLBible.loadParagraph


    def loadCharacterFormatting( self, element, location ):
        """
        """
        marker, text, tail = element.tag, clean(element.text), clean(element.tail)
        Globals.checkXMLNoAttributes( element, location, 'sd12' )
        self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            #print( "element", repr(element.tag) )
            if subelement.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
                self.loadFootnote( subelement, sublocation )
            else:
                logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) )
                halt
        self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadCharacterFormatting


    def loadFigure( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'ff36' )
        Globals.checkXMLNoAttributes( element, location, 'cf35' )
        figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            figTag, figText = subelement.tag, clean(subelement.text)
            assert( figTag in figDict )
            figDict[figTag] = '' if figText is None else figText
            Globals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
            Globals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
            Globals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
        newString = ''
        for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
            newString += ('' if j==0 else '|') + figDict[tag]
        figTail = clean( element.tail )
        self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
    # end of USFXXMLBible.loadFigure


    def loadTable( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'kg92' )
        Globals.checkXMLNoTail( element, location, 'ka92' )
        Globals.checkXMLNoAttributes( element, location, 'ks63' )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            if subelement.tag == 'tr':
                #print( "table", sublocation )
                self.thisBook.appendLine( 'tr', '' )
                Globals.checkXMLNoText( subelement, sublocation, 'sg32' )
                Globals.checkXMLNoTail( subelement, sublocation, 'dh82' )
                Globals.checkXMLNoAttributes( subelement, sublocation, 'mniq' )
                for sub2element in subelement:
                    sub2location = sub2element.tag + " of " + sublocation
                    tag, text = sub2element.tag, clean(sub2element.text)
                    assert( tag in ('th', 'thr', 'tc', 'tcr',) )
                    Globals.checkXMLNoTail( sub2element, sub2location, 'ah82' )
                    Globals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' )
                    level = None
                    for attrib,value in sub2element.items():
                        if attrib == 'level': level = value
                        else:
                            logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    marker = tag + (level if level else '')
                    self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
            else:
                logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) )
    # end of USFXXMLBible.loadTable


    def loadFootnote( self, element, location ):
        """
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',):
                #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) )
            if Globals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',) )
            if marker=='ref':
                assert( fText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 'ls13' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'dq54' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) )
                if marker[0] == 'f': # Starts with f, e.g., fr, ft
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'js72' )
                        if marker2=='ref':
                            print( sub2location )
                            assert( not fText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if fTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) )
        self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadFootnote


    def loadCrossreference( self, element, location ):
        """
        Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
                #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
            if Globals.debugFlag: assert( marker in ('ref','xo','xt',) )
            if marker=='ref':
                assert( xText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
                if marker[0] == 'x': # Starts with x, e.g., xo, xt
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                        if marker2=='ref':
                            assert( not xText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if xTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
        self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )