Python BibleOrganizationalSystem.getBBB Exemples, BibleOrganizationalSystems.BibleOrganizationalSystem.getBBB Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : ZefaniaXMLBible.py Projet : dimleyk/BibleOrgSys

class ZefaniaXMLBible(Bible):
    """
    Class for reading, validating, and converting ZefaniaXMLBible XML.
    """
    XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}"
    treeTag = 'XMLBIBLE'
    infoTag = 'INFORMATION'
    bookTag = 'BIBLEBOOK'
    chapterTag = 'CHAPTER'
    captionTag = 'CAPTION'
    verseTag = 'VERS'
    noteTag = 'NOTE'
    styleTag = 'STYLE'
    breakTag = 'BR'

    def __init__(self, sourceFolder, givenName, encoding='utf-8'):
        """
        Constructor: just sets up the Zefania Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "Zefania XML Bible object"
        self.objectTypeString = "Zefania"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName)

        self.tree = self.header = None  # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            print("ZefaniaXMLBible: File '{}' is unreadable".format(
                self.sourceFilepath))

        self.name = self.givenName
        #if self.name is None:
        #pass

    # end of ZefaniaXMLBible.__init__

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))
        self.tree = ElementTree().parse(self.sourceFilepath)
        if Globals.debugFlag:
            assert (len(self.tree)
                    )  # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == ZefaniaXMLBible.treeTag:
            location = "Zefania XML file"
            Globals.checkXMLNoText(self.tree, location, '4f6h')
            Globals.checkXMLNoTail(self.tree, location, '1wk8')

            schema = None
            name = status = BibleType = revision = version = lgid = None
            for attrib, value in self.tree.items():
                if attrib == ZefaniaXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation':
                    schema = value
                elif attrib == "biblename":
                    name = value
                elif attrib == "lgid":
                    lgid = value  # In italian.xml this is set to "german"
                elif attrib == "status":
                    status = value
                elif attrib == "type":
                    BibleType = value
                elif attrib == "revision":
                    revision = value
                elif attrib == "version":
                    version = value
                else:
                    logging.warning(
                        "Unprocessed '{}' attribute ({}) in main element".
                        format(attrib, value))
            if name: self.name = name
            if status: self.status = status
            if revision: self.revision = revision
            if version: self.version = version

            if self.tree[0].tag == 'INFORMATION':
                self.header = self.tree[0]
                self.tree.remove(self.header)
                self.__validateAndExtractHeader()
            else:  # Handle information records at the END of the file
                ix = len(self.tree) - 1
                if self.tree[ix].tag == 'INFORMATION':
                    self.header = self.tree[ix]
                    self.tree.remove(self.header)
                    self.__validateAndExtractHeader()

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == ZefaniaXMLBible.bookTag:
                    sublocation = "book in " + location
                    Globals.checkXMLNoText(element, sublocation, 'g3g5')
                    Globals.checkXMLNoTail(element, sublocation, 'd3f6')
                    self.__validateAndExtractBook(element)
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        ZefaniaXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load '{}' but got '{}'".format(
                ZefaniaXMLBible.treeTag, self.tree.tag))
        self.doPostLoadProcessing()

    # end of ZefaniaXMLBible.load

    def __validateAndExtractHeader(self):
        """
        Extracts information out of the header record, such as:
            <INFORMATION>
            <title>King James Version</title>
            <creator></creator>
            <subject>The Holy Bible</subject>
            <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description>
            <publisher>FREE BIBLE SOFTWARE GROUP</publisher>
            <contributors />
            <date>2009-01-23</date>
            <type>Bible</type>
            <format>Zefania XML Bible Markup Language</format>
            <identifier>kjv</identifier>
            <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source>
            <language>ENG</language>
            <coverage>provide the Bible to the nations of the world</coverage>
            <rights>We believe that this Bible is found in the Public Domain.</rights>
        </INFORMATION>
        """
        if Globals.debugFlag: assert (self.header)
        location = 'Header'
        Globals.checkXMLNoAttributes(self.header, location, 'j4j6')
        Globals.checkXMLNoText(self.header, location, 'sk4l')
        Globals.checkXMLNoTail(self.header, location, 'a2d4')

        # TODO: We probably need to rationalise some of the self.xxx stores
        for element in self.header:
            #print( "header", element.tag )
            if element.tag == 'title':
                sublocation = "title in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.title = element.text
            elif element.tag == 'creator':
                sublocation = "creator in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.creator = element.text
            elif element.tag == 'subject':
                sublocation = "subject in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.subject = element.text
            elif element.tag == 'description':
                sublocation = "description in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.description = element.text
            elif element.tag == 'publisher':
                sublocation = "publisher in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.publisher = element.text
            elif element.tag == 'contributors':
                sublocation = "contributors in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.contributors = element.text
            elif element.tag == 'date':
                sublocation = "date in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.date = element.text
            elif element.tag == 'type':
                sublocation = "type in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.documentType = element.text
            elif element.tag == 'format':
                sublocation = "format in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                if Globals.debugFlag:
                    assert (
                        element.text == 'Zefania XML Bible Markup Language')
            elif element.tag == 'identifier':
                sublocation = "identifier in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.identifier = element.text
            elif element.tag == 'source':
                sublocation = "source in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.source = element.text
            elif element.tag == 'language':
                sublocation = "language in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.language = element.text
            elif element.tag == 'coverage':
                sublocation = "coverage in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.coverage = element.text
            elif element.tag == 'rights':
                sublocation = "rights in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.rights = element.text
            else:
                logging.error("Found unexpected '{}' tag in {}".format(
                    element.tag, location))

    # end of ZefaniaXMLBible.__validateAndExtractHeader

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML book..."))

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib, value in book.items():
            if attrib == "bnumber":
                bookNumber = value
            elif attrib == "bname":
                bookName = value
            elif attrib == "bsname":
                bookShortName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookNumber:
            try:
                BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                    bookNumber)
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB(bookName)

        if BBB:
            if Globals.verbosityLevel > 2:
                print(_("Validating {} {}...").format(BBB, bookName))
            thisBook = BibleBook(self.name, BBB)
            thisBook.objectNameString = "Zefania XML Bible Book object"
            thisBook.objectTypeString = "Zefania"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == ZefaniaXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    Globals.checkXMLNoText(element, sublocation, 'j3jd')
                    Globals.checkXMLNoTail(element, sublocation, 'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        ZefaniaXMLBible.chapterTag, element.tag))
            if Globals.verbosityLevel > 2:
                print("  Saving {} into results...".format(BBB))
            self.saveBook(thisBook)

    # end of ZefaniaXMLBible.__validateAndExtractBook

    def __validateAndExtractChapter(self, BBB, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML chapter..."))

        # Process the chapter attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "cnumber":
                chapterNumber = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            thisBook.appendLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for BBB".format(BBB))

        for element in chapter:
            if element.tag == ZefaniaXMLBible.verseTag:
                location = "verse in {} {}".format(BBB, chapterNumber)
                self.__validateAndExtractVerse(BBB, chapterNumber, thisBook,
                                               element)
            elif element.tag == ZefaniaXMLBible.captionTag:  # Used in Psalms
                location = "caption in {} {}".format(BBB, chapterNumber)
                Globals.checkXMLNoTail(element, location, 'k5k8')
                Globals.checkXMLNoSubelements(element, location, 'd3f5')
                # Handle caption attributes
                vRef = None
                for attrib, value in element.items():
                    if attrib == "vref":
                        vRef = value
                        if Globals.debugFlag: assert (vRef == '1')
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in caption element"
                            .format(attrib, value))
                if Globals.debugFlag: assert (vRef)
                vText = element.text
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, vRef))
                if vText:  # This is the main text of the caption
                    #print( "{} {}:{} '{}'".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.appendLine('v', '0' + ' ' +
                                        vText)  # We save it as verse zero
            else:
                logging.error("Expected to find '{}' but got '{}'".format(
                    ZefaniaXMLBible.verseTag, element.tag))

    # end of ZefaniaXMLBible.__validateAndExtractChapter

    def __validateAndExtractVerse(self, BBB, chapterNumber, thisBook, verse):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML verse..."))

        location = "verse in {} {}".format(BBB, chapterNumber)
        Globals.checkXMLNoTail(verse, location, 'l5ks')

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib, value in verse.items():
            if attrib == "vnumber":
                verseNumber = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in verse element".format(
                        attrib, value))
        if Globals.debugFlag: assert (verseNumber)
        location = "{}:{}".format(
            location, verseNumber)  # Get a better location description
        #thisBook.appendLine( 'v', verseNumber )
        vText = verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
        #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        # Handle verse subelements (notes and styled portions)
        for subelement in verse:
            if subelement.tag == ZefaniaXMLBible.noteTag:
                sublocation = "note in " + location
                noteType = None
                for attrib, value in subelement.items():
                    if attrib == "type":
                        noteType = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in style subelement"
                            .format(attrib, value))
                if noteType not in (
                        'n-studynote',
                        'x-studynote',
                ):
                    logging.warning("Unexpected {} note type in {}".format(
                        noteType, BBB))
                if Globals.debugFlag: assert (noteType)
                nText, nTail = subelement.text, subelement.tail
                #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
                #thisBook.appendLine( 'ST', css ) # XXXXXXXXXXXXXXXXXXXXXXXXXX Losing data here (for now)
                #thisBook.appendLine( 'ST=', nText )
                if nTail:
                    if '\n' in nTail:
                        print(
                            "ZefaniaXMLBible.__validateAndExtractVerse: nTail {} {}:{} '{}'"
                            .format(BBB, chapterNumber, verseNumber, nTail))
                        nTail = nTail.replace('\n', ' ')
                    thisBook.appendLine('v~', nTail)
                for subsubelement in subelement:
                    if subsubelement.tag == ZefaniaXMLBible.styleTag:
                        subsublocation = "style in " + sublocation
                        Globals.checkXMLNoSubelements(subsubelement,
                                                      subsublocation, 'fyt4')
                        css = idStyle = None
                        for attrib, value in subsubelement.items():
                            if attrib == "css":
                                css = value
                            elif attrib == "id":
                                idStyle = value
                            else:
                                logging.warning(
                                    "Unprocessed '{}' attribute ({}) in style subsubelement"
                                    .format(attrib, value))
                        if Globals.debugFlag: assert (css or idStyle)
                        SFM = None
                        if css == "font-style:italic": SFM = '\\it'
                        elif css == "font-style:italic;font-weight:bold":
                            SFM = '\\bdit'
                        elif css == "color:#FF0000":
                            SFM = '\\em'
                        elif css == "font-size: x-small; color:#8B8378":
                            SFM = '\\add'
                        elif css is None and idStyle == 'cl:divineName':
                            SFM = '\\nd'
                        else:
                            print("css is", css, "idStyle is", idStyle)
                            halt
                        sText, sTail = subsubelement.text.strip(
                        ), subsubelement.tail
                        if Globals.debugFlag: assert (sText)
                        if SFM: vText += SFM + ' ' + sText + SFM + '*'
                        else:
                            vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* '  # Use sc for unknown styles
                        if sTail: vText += sTail.strip()
                    else:
                        logging.error(
                            "Expected to find {} but got '{}' in {}".format(
                                ZefaniaXMLBible.styleTag, subsubelement.tag,
                                sublocation))

            elif subelement.tag == ZefaniaXMLBible.styleTag:
                sublocation = "style in " + location
                Globals.checkXMLNoSubelements(subelement, sublocation, 'f5gh')
                css = idStyle = None
                for attrib, value in subelement.items():
                    if attrib == "css":
                        css = value
                    elif attrib == "id":
                        idStyle = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in style subelement"
                            .format(attrib, value))
                if Globals.debugFlag: assert (css or idStyle)
                SFM = None
                if css == "font-style:italic": SFM = '\\it'
                elif css == "font-style:italic;font-weight:bold":
                    SFM = '\\bdit'
                elif css == "color:#FF0000":
                    SFM = '\\em'
                elif css == "font-size: x-small; color:#8B8378":
                    SFM = '\\add'
                elif css is None and idStyle == 'cl:divineName':
                    SFM = '\\nd'
                else:
                    print("css is", css, "idStyle is", idStyle)
                    halt
                sText, sTail = subelement.text.strip(), subelement.tail
                if Globals.debugFlag: assert (sText)
                if SFM: vText += SFM + ' ' + sText + SFM + '*'
                else:
                    vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* '  # Use sc for unknown styles
                if sTail: vText += sTail.strip()

            elif subelement.tag == ZefaniaXMLBible.breakTag:
                sublocation = "line break in " + location
                Globals.checkXMLNoText(subelement, sublocation, 'c1d4')
                Globals.checkXMLNoSubelements(subelement, sublocation, 'g4g8')
                art = None
                for attrib, value in subelement.items():
                    if attrib == "art":
                        art = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in style subelement"
                            .format(attrib, value))
                if Globals.debugFlag: assert (art == 'x-nl')
                #print( BBB, chapterNumber, verseNumber )
                #assert( vText )
                if vText:
                    thisBook.appendLine('v', verseNumber + ' ' + vText)
                    vText = ''
                thisBook.appendLine(
                    'm',
                    subelement.tail.strip() if subelement.tail else '')
                #bTail = subelement.tail
                #if bTail: vText = bTail.strip()
            else:
                logging.error(
                    "Expected to find NOTE or STYLE but got '{}' in {}".format(
                        subelement.tag, location))

        if vText:  # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                print(
                    "ZefaniaXMLBible.__validateAndExtractVerse: vText {} {}:{} '{}'"
                    .format(BBB, chapterNumber, verseNumber, vText))
                vText = vText.replace('\n', ' ')
            thisBook.appendLine('v', verseNumber + ' ' + vText)

Exemple #2

0

Afficher le fichier

Fichier : OpenSongXMLBible.py Projet : alerque/BibleOrgSys

class OpenSongXMLBible( Bible ):
    """
    Class for reading, validating, and converting OpenSong Bible XML.
    """
    treeTag = 'bible'
    bookTag = 'b'
    chapterTag = 'c'
    verseTag = 'v'


    def __init__( self, sourceFolder, givenName, encoding='utf-8' ):
        """
        Constructor: just sets up the XML Bible file converter object.
        """
        # Setup and initialise the base class first
        if BibleOrgSysGlobals.debugFlag: print( "OpenSongXMLBible( {}, {}, {} )".format( sourceFolder, givenName, encoding ) )
        Bible.__init__( self )
        self.objectNameString = "OpenSong XML Bible object"
        self.objectTypeString = "OpenSong"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath =  os.path.join( self.sourceFolder, self.givenName )

        self.tree = None # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Do a preliminary check on the readability of our file
        if not os.access( self.sourceFilepath, os.R_OK ):
            print( "OpenSongXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) )

        self.name = self.givenName
        #if self.name is None:
            #pass
    # end of OpenSongXMLBible.__init__


    def load( self ):
        """
        Load a single source XML file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        self.tree = ElementTree().parse( self.sourceFilepath )
        if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == OpenSongXMLBible.treeTag:
            location = "XML file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )

            name = shortName = None
            for attrib,value in self.tree.items():
                if attrib=="n":
                    name = value
                elif attrib=="sn":
                    shortName = value
                else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) )

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == OpenSongXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' )
                    self.__validateAndExtractBook( element )
                elif element.tag == 'OT':
                    pass
                elif element.tag == 'NT':
                    pass
                else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag ) )
        else: logging.error( "Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.tree.tag ) )
        self.doPostLoadProcessing()
    # end of OpenSongXMLBible.load


    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBB( bookName ) # Booknames are in English
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.addLine( 'h', bookName )
                thisBook.addLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                        BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
                self.saveBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
    # end of OpenSongXMLBible.__validateAndExtractBook


    def __validateAndExtractChapter( self, BBB, thisBook, chapter ):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter...") )

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib,value in chapter.items():
            if attrib=="n":
                chapterNumber = value
            elif attrib=="VERSES":
                numVerses = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) )
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace( 'of Solomon ', '' ) # Fix a mistake in the Chinese_SU module
            thisBook.addLine( 'c', chapterNumber )
        else: logging.error( "Missing 'n' attribute in chapter element for BBB".format( BBB ) )

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'l5ks' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5f7h' )
                verseNumber = toVerseNumber = None
                for attrib,value in element.items():
                    if attrib=="n":
                        verseNumber = value
                    elif attrib=="t":
                        toVerseNumber = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( verseNumber )
                #thisBook.addLine( 'v', verseNumber )
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )
                if vText: # This is the main text of the verse (follows the verse milestone)
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText: # This is how they represent poety
                        #print( "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate( vText.split( '\n' ) ):
                            if j==0:
                                thisBook.addLine( 'q1', '' )
                                thisBook.addLine( 'v', verseNumber + ' ' + textBit )
                            else: thisBook.addLine( 'q1', textBit )
                    else: # Just one verse line
                        thisBook.addLine( 'v', verseNumber + ' ' + vText )
            else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag ) )

Exemple #3

0

Afficher le fichier

Fichier : HaggaiXMLBible.py Projet : alerque/BibleOrgSys

class HaggaiXMLBible( Bible ):
    """
    Class for reading, validating, and converting HaggaiXMLBible XML.
    """
    XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}"
    treeTag = 'XMLBIBLE'
    infoTag = 'INFORMATION'
    bookTag = 'BIBLEBOOK'
    chapterTag = 'CHAPTER'
    captionTag = 'CAPTION'
    paragraphTag = 'PARAGRAPH'
    verseTag = 'VERSE'
    noteTag = 'NOTE'
    styleTag = 'STYLE'
    breakTag = 'BR'


    def __init__( self, sourceFolder, givenName, encoding='utf-8' ):
        """
        Constructor: just sets up the Haggai Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "Haggai XML Bible object"
        self.objectTypeString = "Haggai"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath =  os.path.join( self.sourceFolder, self.givenName )

        self.tree = self.header = None # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Do a preliminary check on the readability of our file
        if not os.access( self.sourceFilepath, os.R_OK ):
            print( "HaggaiXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) )

        self.name = self.givenName
        #if self.name is None:
            #pass
    # end of HaggaiXMLBible.__init__


    def load( self ):
        """
        Load a single source XML file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        self.tree = ElementTree().parse( self.sourceFilepath )
        if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == HaggaiXMLBible.treeTag:
            location = "Haggai XML file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )

            schema = name = status = BibleType = revision = version = lgid = None
            for attrib,value in self.tree.items():
                if attrib == HaggaiXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation':
                    schema = value
                elif attrib == "biblename":
                    name = value
                elif attrib == "lgid":
                    lgid = value # In italian.xml this is set to "german"
                elif attrib == "status":
                    status = value
                elif attrib == "type":
                    BibleType = value
                elif attrib == "revision":
                    revision = value
                elif attrib == "version":
                    version = value
                else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) )
            if name: self.name = name
            if status: self.status = status
            if revision: self.revision = revision
            if version: self.version = version

            if self.tree[0].tag == 'INFORMATION':
                self.header = self.tree[0]
                self.tree.remove( self.header )
                self.__validateAndExtractHeader()
            else: # Handle information records at the END of the file
                ix = len(self.tree) - 1
                if self.tree[ix].tag == 'INFORMATION':
                    self.header = self.tree[ix]
                    self.tree.remove( self.header )
                    self.__validateAndExtractHeader()

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == HaggaiXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' )
                    self.__validateAndExtractBook( element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.bookTag, element.tag ) )
        else: logging.error( "Expected to load {!r} but got {!r}".format( HaggaiXMLBible.treeTag, self.tree.tag ) )
        self.doPostLoadProcessing()
    # end of HaggaiXMLBible.load


    def __validateAndExtractHeader( self ):
        """
        Extracts information out of the header record, such as:
            <INFORMATION>
            <title>King James Version</title>
            <creator></creator>
            <subject>The Holy Bible</subject>
            <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description>
            <publisher>FREE BIBLE SOFTWARE GROUP</publisher>
            <contributors />
            <date>2009-01-23</date>
            <type>Bible</type>
            <format>Haggai XML Bible Markup Language</format>
            <identifier>kjv</identifier>
            <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source>
            <language>ENG</language>
            <coverage>provide the Bible to the nations of the world</coverage>
            <rights>We believe that this Bible is found in the Public Domain.</rights>
        </INFORMATION>
        """
        if BibleOrgSysGlobals.debugFlag: assert( self.header )
        location = 'Header'
        BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' )
        BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' )
        BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' )

        # TODO: We probably need to rationalise some of the self.xxx stores
        for element in self.header:
            #print( "header", element.tag )
            if element.tag == 'title':
                sublocation = "title in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                self.title = element.text
            elif element.tag == 'creator':
                sublocation = "creator in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.creator = element.text
            elif element.tag == 'subject':
                sublocation = "subject in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.subject = element.text
            elif element.tag == 'description':
                sublocation = "description in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                self.description = element.text
            elif element.tag == 'publisher':
                sublocation = "publisher in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.publisher = element.text
            elif element.tag == 'contributor':
                sublocation = "contributor in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' )
                if element.text:
                    try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list
                    except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one
            elif element.tag == 'contributors':
                sublocation = "contributors in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.contributors = element.text
            elif element.tag == 'date':
                sublocation = "date in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                self.date = element.text
            elif element.tag == 'type':
                sublocation = "type in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.documentType = element.text
            elif element.tag == 'format':
                sublocation = "format in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                if BibleOrgSysGlobals.debugFlag: assert( element.text == 'Haggai XML Bible Markup Language' )
            elif element.tag == 'identifier':
                sublocation = "identifier in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                self.identifier = element.text
            elif element.tag == 'source':
                sublocation = "source in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                self.source = element.text
            elif element.tag == 'language':
                sublocation = "language in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert( element.text )
                self.language = element.text
            elif element.tag == 'coverage':
                sublocation = "coverage in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.coverage = element.text
            elif element.tag == 'rights':
                sublocation = "rights in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.rights = element.text
            else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) )
    # end of HaggaiXMLBible.__validateAndExtractHeader


    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book...") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB( bookName )

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = "Haggai XML Bible Book object"
            thisBook.objectTypeString = "Haggai"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' )
                    thisBook.addLine( 'mt', element.text )
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
            self.saveBook( thisBook )
    # end of HaggaiXMLBible.__validateAndExtractBook


    def __validateAndExtractChapter( self, BBB, thisBook, chapter ):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter...") )

        # Process the chapter attributes first
        chapterNumber = numVerses = None
        for attrib,value in chapter.items():
            if attrib=="cnumber":
                chapterNumber = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) )
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            thisBook.addLine( 'c', chapterNumber )
        else: logging.error( "Missing 'n' attribute in chapter element for BBB".format( BBB ) )

        for element in chapter:
            if element.tag == HaggaiXMLBible.paragraphTag:
                location = "paragraph in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractParagraph( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.verseTag+'disabled':
                location = "verse in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms
                location = "caption in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' )
                # Handle caption attributes
                vRef = None
                for attrib,value in element.items():
                    if attrib=="vref":
                        vRef = value
                        if BibleOrgSysGlobals.debugFlag: assert( vRef == '1' )
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( vRef )
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) )
                if vText: # This is the main text of the caption
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero
            else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
    # end of HaggaiXMLBible.__validateAndExtractChapter


    def __validateAndExtractParagraph( self, BBB, chapterNumber, thisBook, paragraph ):
        """
        Check/validate and extract paragraph data from the given XML book record
            finding and saving paragraphs and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML paragraph...") )

        location = "paragraph in {} {}".format( BBB, chapterNumber )
        BibleOrgSysGlobals.checkXMLNoAttributes( paragraph, location, 'brgw3' )
        BibleOrgSysGlobals.checkXMLNoText( paragraph, location, 'brgw3' )
        BibleOrgSysGlobals.checkXMLNoTail( paragraph, location, 'brgw3' )
        thisBook.addLine( 'p', '' )

        # Handle verse subelements (verses)
        for element in paragraph:
            if element.tag == HaggaiXMLBible.verseTag:
                location = "verse in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms
                location = "caption in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' )
                # Handle caption attributes
                vRef = None
                for attrib,value in element.items():
                    if attrib=="vref":
                        vRef = value
                        if BibleOrgSysGlobals.debugFlag: assert( vRef == '1' )
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( vRef )
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) )
                if vText: # This is the main text of the caption
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero
            else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
    # end of HaggaiXMLBible.__validateAndExtractParagraph


    def __validateAndExtractVerse( self, BBB, chapterNumber, thisBook, verse ):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML verse...") )

        location = "verse in {} {}".format( BBB, chapterNumber )
        BibleOrgSysGlobals.checkXMLNoTail( verse, location, 'l5ks' )

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib,value in verse.items():
            if attrib=="vnumber":
                verseNumber = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) )
        if BibleOrgSysGlobals.debugFlag: assert( verseNumber )
        location = "{}:{}".format( location, verseNumber ) # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
            #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        # Handle verse subelements (notes and styled portions)
        for subelement in verse:
            if subelement.tag == HaggaiXMLBible.noteTag:
                sublocation = "note in " + location
                noteType = None
                for attrib,value in subelement.items():
                    if attrib=="type": noteType = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if noteType and noteType not in ('variant',):
                    logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
                nText, nTail = subelement.text, subelement.tail
                #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
                vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
                if nTail:
                    if '\n' in nTail:
                        print( "HaggaiXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
                        nTail = nTail.replace( '\n', ' ' )
                    vText += nTail
                for subsubelement in subelement:
                    if subsubelement.tag == HaggaiXMLBible.styleTag:
                        subsublocation = "style in " + sublocation
                        BibleOrgSysGlobals.checkXMLNoSubelements( subsubelement, subsublocation, 'fyt4' )
                        fs = css = idStyle = None
                        for attrib,value in subsubelement.items():
                            if attrib=='fs': fs = value
                            #elif attrib=="css": css = value
                            #elif attrib=="id": idStyle = value
                            else: logging.warning( "Unprocessed {!r} attribute ({}) in style subsubelement".format( attrib, value ) )
                        if BibleOrgSysGlobals.debugFlag: assert( fs or css or idStyle )
                        SFM = None
                        if fs == 'italic': SFM = '\\it'
                        elif fs == 'super': SFM = '\\bdit'
                        elif fs == 'emphasis': SFM = '\\em'
                        else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
                        #if css == "font-style:italic": SFM = '\\it'
                        #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
                        #elif css == "color:#FF0000": SFM = '\\em'
                        #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
                        #elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
                        #else: print( "css is", css, "idStyle is", idStyle ); halt
                        sText, sTail = subsubelement.text.strip(), subsubelement.tail
                        if BibleOrgSysGlobals.debugFlag: assert( sText )
                        if SFM: vText += SFM+' ' + sText + SFM+'*'
                        else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
                        if sTail: vText += sTail.strip()
                    else: logging.error( "Expected to find {} but got {!r} in {}".format( HaggaiXMLBible.styleTag, subsubelement.tag, sublocation ) )

            elif subelement.tag == HaggaiXMLBible.styleTag:
                sublocation = "style in " + location
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
                fs = css = idStyle = None
                for attrib,value in subelement.items():
                    if attrib=="fs": fs = value
                    #elif attrib=="css": css = value
                    #elif attrib=="id": idStyle = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( fs )
                SFM = None
                if fs == 'super': SFM = '\\bdit'
                elif fs == 'emphasis': SFM = '\\em'
                else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
                #if css == "font-style:italic": SFM = '\\it'
                #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
                #elif css == "color:#FF0000": SFM = '\\em'
                #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
                #elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
                #else: print( "css is", css, "idStyle is", idStyle ); halt
                sText, sTail = subelement.text.strip(), subelement.tail
                if BibleOrgSysGlobals.debugFlag: assert( sText )
                #print( BBB, chapterNumber, sublocation )
                if SFM: vText += SFM+' ' + sText + SFM+'*'
                else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
                if sTail: vText += sTail.strip()

            elif subelement.tag == HaggaiXMLBible.breakTag:
                sublocation = "line break in " + location
                BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
                art = None
                for attrib,value in subelement.items():
                    if attrib=="art":
                        art = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert( art == 'x-nl' )
                #print( BBB, chapterNumber, verseNumber )
                #assert( vText )
                if vText:
                    thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
                    vText = ''
                thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
                #bTail = subelement.tail
                #if bTail: vText = bTail.strip()
            else: logging.error( "Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText: # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                print( "HaggaiXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                vText = vText.replace( '\n', ' ' )
            thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None

Exemple #4

0

Afficher le fichier

class OpenSongXMLBible(Bible):
    """
    Class for reading, validating, and converting OpenSong Bible XML.
    """
    treeTag = 'bible'
    bookTag = 'b'
    chapterTag = 'c'
    verseTag = 'v'

    def __init__(self, sourceFolder, givenName, encoding='utf-8'):
        """
        Constructor: just sets up the XML Bible file converter object.
        """
        # Setup and initialise the base class first
        if Globals.debugFlag:
            print("OpenSongXMLBible( {}, {}, {} )".format(
                sourceFolder, givenName, encoding))
        Bible.__init__(self)
        self.objectNameString = "OpenSong XML Bible object"
        self.objectTypeString = "OpenSong"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName)

        self.tree = None  # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            print("OpenSongXMLBible: File '{}' is unreadable".format(
                self.sourceFilepath))

        self.name = self.givenName
        #if self.name is None:
        #pass

    # end of OpenSongXMLBible.__init__

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))
        self.tree = ElementTree().parse(self.sourceFilepath)
        if Globals.debugFlag:
            assert (len(self.tree)
                    )  # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == OpenSongXMLBible.treeTag:
            location = "XML file"
            Globals.checkXMLNoText(self.tree, location, '4f6h')
            Globals.checkXMLNoTail(self.tree, location, '1wk8')

            name = shortName = None
            for attrib, value in self.tree.items():
                if attrib == "n":
                    name = value
                elif attrib == "sn":
                    shortName = value
                else:
                    logging.warning(
                        "Unprocessed '{}' attribute ({}) in main element".
                        format(attrib, value))

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == OpenSongXMLBible.bookTag:
                    sublocation = "book in " + location
                    Globals.checkXMLNoText(element, sublocation, 'g3g5')
                    Globals.checkXMLNoTail(element, sublocation, 'd3f6')
                    self.__validateAndExtractBook(element)
                elif element.tag == 'OT':
                    pass
                elif element.tag == 'NT':
                    pass
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        OpenSongXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load '{}' but got '{}'".format(
                OpenSongXMLBible.treeTag, self.tree.tag))

    # end of OpenSongXMLBible.load

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book..."))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBB(bookName)
            if BBB:
                if Globals.verbosityLevel > 2:
                    print(_("Validating {} {}...").format(BBB, bookName))
                thisBook = BibleBook(self.name, BBB)
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        Globals.checkXMLNoText(element, sublocation, 'j3jd')
                        Globals.checkXMLNoTail(element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find '{}' but got '{}'".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if Globals.verbosityLevel > 2:
                    print("  Saving {} into results...".format(BBB))
                self.saveBook(thisBook)
            logging.error(
                _("OpenSong load doesn't recognize book name: '{}'").format(
                    bookName))
        logging.error(_("OpenSong load can't find a book name"))

    # end of OpenSongXMLBible.__validateAndExtractBook

    def __validateAndExtractChapter(self, BBB, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML chapter..."))

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "n":
                chapterNumber = value
            elif attrib == "VERSES":
                numVerses = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace(
                'of Solomon ', '')  # Fix a mistake in the Chinese_SU module
            thisBook.appendLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for BBB".format(BBB))

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format(BBB, chapterNumber)
                Globals.checkXMLNoTail(element, sublocation, 'l5ks')
                Globals.checkXMLNoSubelements(element, sublocation, '5f7h')
                verseNumber = toVerseNumber = None
                for attrib, value in element.items():
                    if attrib == "n":
                        verseNumber = value
                    elif attrib == "t":
                        toVerseNumber = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in verse element".
                            format(attrib, value))
                if Globals.debugFlag: assert (verseNumber)
                #thisBook.appendLine( 'v', verseNumber )
                vText = element.text
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, verseNumber))
                if vText:  # This is the main text of the verse (follows the verse milestone)
                    #print( "{} {}:{} '{}'".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText:  # This is how they represent poety
                        #print( "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate(vText.split('\n')):
                            if j == 0:
                                thisBook.appendLine('q1', '')
                                thisBook.appendLine(
                                    'v', verseNumber + ' ' + textBit)
                            else:
                                thisBook.appendLine('q1', textBit)
                    else:  # Just one verse line
                        thisBook.appendLine('v', verseNumber + ' ' + vText)
            else:
                logging.error("Expected to find '{}' but got '{}'".format(
                    OpenSongXMLBible.verseTag, element.tag))