Python BibleOrgSysGlobals.checkXMLNoTail 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: BibleOrgSys

클래스/타입: BibleOrgSysGlobals

메소드/함수: checkXMLNoTail

hotexamples.com에서의 예제들: 16

Python BibleOrgSysGlobals.checkXMLNoTail - 16개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 BibleOrgSys.BibleOrgSysGlobals.checkXMLNoTail에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

introduceProgram(30)

checkXMLNoTail(16)

setup(14)

checkXMLNoText(12)

checkXMLNoAttributes(11)

checkXMLNoSubelements(10)

preloadCommonData(10)

peekIntoFile(7)

addStandardOptionsAndProcess(4)

makeSafeFilename(4)

removeAccents(1)

printUnicodeInfo(1)

getFlattenedXML(1)

makeSafeXML(1)

applyStringAdjustments(1)

findHomeFolderpath(1)

fileCompare(1)

closedown(1)

unpickleObject(1)

예제 #1

파일 보기

파일: BibleBookOrdersConverter.py 프로젝트: janfri/BibleOrgSys

    def loadSystems( self, XMLFolder=None ):
        """
        Load and pre-process the specified book order systems.
        """
        if not self._XMLSystems: # Only ever do this once
            if XMLFolder is None: XMLFolder = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( 'BookOrders/' ) # Relative to module, not cwd
            self.__XMLFolder = XMLFolder
            vPrint( 'Info', debuggingThisModule, _("Loading book order systems from {}…").format( self.__XMLFolder ) )
            filenamePrefix = "BIBLEBOOKORDER_"
            for filename in os.listdir( self.__XMLFolder ):
                filepart, extension = os.path.splitext( filename )
                if extension.upper() == '.XML' and filepart.upper().startswith(filenamePrefix):
                    bookOrderSystemCode = filepart[len(filenamePrefix):]
                    vPrint( 'Verbose', debuggingThisModule, _("  Loading{} book order system from {}…").format( bookOrderSystemCode, filename ) )
                    self._XMLSystems[bookOrderSystemCode] = {}
                    self._XMLSystems[bookOrderSystemCode]['tree'] = ElementTree().parse( os.path.join( self.__XMLFolder, filename ) )
                    assert self._XMLSystems[bookOrderSystemCode]['tree'] # Fail here if we didn't load anything at all

                    # Check and remove the header element
                    if self._XMLSystems[bookOrderSystemCode]['tree'].tag  == self.XMLTreeTag:
                        header = self._XMLSystems[bookOrderSystemCode]['tree'][0]
                        if header.tag == self.headerTag:
                            self._XMLSystems[bookOrderSystemCode]['header'] = header
                            self._XMLSystems[bookOrderSystemCode]['tree'].remove( header )
                            BibleOrgSysGlobals.checkXMLNoText( header, 'header' )
                            BibleOrgSysGlobals.checkXMLNoTail( header, 'header' )
                            BibleOrgSysGlobals.checkXMLNoAttributes( header, 'header' )
                            if len(header)>1:
                                logging.info( _("Unexpected elements in header") )
                            elif len(header)==0:
                                logging.info( _("Missing work element in header") )
                            else:
                                work = header[0]
                                BibleOrgSysGlobals.checkXMLNoText( work, "work in header" )
                                BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" )
                                BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" )
                                if work.tag == "work":
                                    self._XMLSystems[bookOrderSystemCode]['version'] = work.find('version').text
                                    self._XMLSystems[bookOrderSystemCode]['date'] = work.find('date').text
                                    self._XMLSystems[bookOrderSystemCode]['title'] = work.find('title').text
                                else:
                                    logging.warning( _("Missing work element in header") )
                        else:
                            logging.warning( _("Missing header element (looking for {!r} tag)").format( self.headerTag ) )
                    else:
                        logging.error( _("Expected to load {!r} but got {!r}").format( self.XMLTreeTag, self._XMLSystems[bookOrderSystemCode]['tree'].tag ) )
                    bookCount = 0 # There must be an easier way to do this
                    for subelement in self._XMLSystems[bookOrderSystemCode]['tree']:
                        bookCount += 1
                    vPrint( 'Info', debuggingThisModule, _("    Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) )
                    logging.info( _("    Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) )

                if BibleOrgSysGlobals.strictCheckingFlag:
                    self.__validateSystem( self._XMLSystems[bookOrderSystemCode]['tree'], bookOrderSystemCode )
        else: # The data must have been already loaded
            if XMLFolder is not None and XMLFolder!=self.__XMLFolder: logging.error( _("Bible book order systems are already loaded -- your different folder of {!r} was ignored").format( self.__XMLFolder ) )
        return self

예제 #2

파일 보기

파일: VerseViewXMLBible.py 프로젝트: janfri/BibleOrgSys

    def __validateAndExtractBook(self, book, bookNumber):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        vPrint('Verbose', debuggingThisModule, _("Validating XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))

        if bookName:
            BBB = self.genericBOS.getBBBFromText(bookName)
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName)
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText(adjustedBookName)
        BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
            bookNumber)
        if BBB2 != BBB:  # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                vPrint(
                    'Quiet', debuggingThisModule,
                    "Assuming that book {} {!r} is {} (not {})".format(
                        bookNumber, bookName, BBB2, BBB))
            BBB = BBB2
            #vPrint( 'Quiet', debuggingThisModule, BBB ); halt

        if BBB:
            vPrint('Info', debuggingThisModule,
                   _("Validating {} {}…").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'j3jd')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error(
                        "vb26 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.chapterTag, element.tag))
            vPrint('Info', debuggingThisModule,
                   "  Saving {} into results…".format(BBB))
            self.stashBook(thisBook)

예제 #3

파일 보기

파일: BibleReferencesLinksConverter.py 프로젝트: janfri/BibleOrgSys

    def __load(self, XMLFileOrFilepath):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert XMLFileOrFilepath
        self.__XMLFileOrFilepath = XMLFileOrFilepath
        assert self._XMLTree is None or len(
            self._XMLTree) == 0  # Make sure we're not doing this twice

        vPrint(
            'Info', debuggingThisModule,
            _("Loading BibleReferencesLinks XML file from {!r}…").format(
                self.__XMLFileOrFilepath))
        self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath)
        assert self._XMLTree  # Fail here if we didn't load anything at all

        if self._XMLTree.tag == self._treeTag:
            header = self._XMLTree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLTree.remove(header)
                BibleOrgSysGlobals.checkXMLNoText(header, 'header')
                BibleOrgSysGlobals.checkXMLNoTail(header, 'header')
                BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header')
                if len(header) > 1:
                    logging.info(_("Unexpected elements in header"))
                elif len(header) == 0:
                    logging.info(_("Missing work element in header"))
                else:
                    work = header[0]
                    BibleOrgSysGlobals.checkXMLNoText(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoTail(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        work, "work in header")
                    if work.tag == "work":
                        self.PROGRAM_VERSION = work.find('version').text
                        self.dateString = work.find('date').text
                        self.titleString = work.find('title').text
                    else:
                        logging.warning(_("Missing work element in header"))
            else:
                logging.warning(
                    _("Missing header element (looking for {!r} tag)".format(
                        self._headerTag)))
            if header.tail is not None and header.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after header").format(
                        header.tail))
        else:
            logging.error(
                _("Expected to load {!r} but got {!r}").format(
                    self._treeTag, self._XMLTree.tag))

예제 #4

파일 보기

    def validateEntries(self, segment) -> None:
        """
        Check/validate the given Strongs lexicon entries.
        """
        if BibleOrgSysGlobals.debugFlag: assert segment.tag == 'entries'
        BibleOrgSysGlobals.checkXMLNoText(segment, segment.tag, "kw99")
        BibleOrgSysGlobals.checkXMLNoTail(segment, segment.tag, "ls90")
        BibleOrgSysGlobals.checkXMLNoAttributes(segment, segment.tag, "hsj2")

        self.StrongsEntries = {}
        for element in segment:
            if element.tag == 'entry':
                self.validateEntry(element)

예제 #5

파일 보기

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))
        self.XMLTree = ElementTree().parse(self.sourceFilepath)
        if BibleOrgSysGlobals.debugFlag:
            assert self.XMLTree  # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.XMLTree.tag == OpenSongXMLBible.treeTag:
            location = "XML file"
            BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h')
            BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8')

            name = shortName = None
            for attrib, value in self.XMLTree.items():
                if attrib == "n":
                    name = value
                elif attrib == "sn":
                    shortName = value
                else:
                    logging.warning(
                        "Unprocessed {!r} attribute ({}) in main element".
                        format(attrib, value))

            # Find the submain (book) containers
            for element in self.XMLTree:
                if element.tag == OpenSongXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'g3g5')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'd3f6')
                    self.__validateAndExtractBook(element)
                elif element.tag == 'OT':
                    pass
                elif element.tag == 'NT':
                    pass
                else:
                    logging.error("Expected to find {!r} but got {!r}".format(
                        OpenSongXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load {!r} but got {!r}".format(
                OpenSongXMLBible.treeTag, self.XMLTree.tag))
        self.doPostLoadProcessing()

예제 #6

파일 보기

    def _validate( self ):
        """
        Check/validate the loaded data.
        """
        assert self._XMLTree

        uniqueDict = {}
        #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        for j,element in enumerate(self._XMLTree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( "Compulsory {!r} attribute is missing from {} element in record {}".format( attributeName, element.tag, j ) )
                    if not attributeValue and attributeName!="type":
                        logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) )

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( "Optional {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}".format( attributeName, attributeValue, element.tag, j ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None and attributeName!="reference_name":
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}".format( attributeValue, attributeName, element.tag, j ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )
            else:
                logging.warning( "Unexpected element: {} in record {}".format( element.tag, j ) )

예제 #7

파일 보기

파일: VerseViewXMLBible.py 프로젝트: janfri/BibleOrgSys

    def __validateAndExtractVerse(self, BBB: str, chapterNumber, thisBook,
                                  verse):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3:
            vPrint('Quiet', debuggingThisModule, _("Validating XML verse…"))

        location = "verse in {} {}".format(BBB, chapterNumber)
        BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20')
        BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks')

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib, value in verse.items():
            if attrib == "n":
                verseNumber = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in verse element".format(
                        attrib, value))
        if BibleOrgSysGlobals.debugFlag: assert verseNumber
        location = "{}:{}".format(
            location, verseNumber)  # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
        #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        ## Handle verse subelements (notes and styled portions)
        #for subelement in verse:
        #if subelement.tag == VerseViewXMLBible.noteTag:
        #sublocation = "note in " + location
        #noteType = None
        #for attrib,value in subelement.items():
        #if attrib=="type": noteType = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if noteType and noteType not in ('variant',):
        #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
        #nText, nTail = subelement.text, subelement.tail
        ##vPrint( 'Quiet', debuggingThisModule, "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
        #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
        #if nTail:
        #if '\n' in nTail:
        #vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
        #nTail = nTail.replace( '\n', ' ' )
        #vText += nTail
        #for sub2element in subelement:
        #if sub2element.tag == VerseViewXMLBible.styleTag:
        #sub2location = "style in " + sublocation
        #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' )
        #fs = css = idStyle = None
        #for attrib,value in sub2element.items():
        #if attrib=='fs': fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle
        #SFM = None
        #if fs == 'italic': SFM = '\\it'
        #elif fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = sub2element.text.strip(), sub2element.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()
        #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) )

        #elif subelement.tag == VerseViewXMLBible.styleTag:
        #sublocation = "style in " + location
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
        #fs = css = idStyle = None
        #for attrib,value in subelement.items():
        #if attrib=="fs": fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs
        #SFM = None
        #if fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = subelement.text.strip(), subelement.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, sublocation )
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()

        #elif subelement.tag == VerseViewXMLBible.breakTag:
        #sublocation = "line break in " + location
        #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
        #art = None
        #for attrib,value in subelement.items():
        #if attrib=="art":
        #art = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl'
        ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber )
        ##assert vText
        #if vText:
        #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
        #vText = ''
        #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
        ##bTail = subelement.tail
        ##if bTail: vText = bTail.strip()
        #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText:  # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                vPrint(
                    'Quiet', debuggingThisModule,
                    "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}"
                    .format(BBB, chapterNumber, verseNumber, vText))
                vText = vText.replace('\n', ' ')
            thisBook.addLine('v', verseNumber + ' ' + vText)
            verseNumber = None

예제 #8

파일 보기

파일: VerseViewXMLBible.py 프로젝트: janfri/BibleOrgSys

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))
        self.XMLTree = ElementTree().parse(self.sourceFilepath)
        if BibleOrgSysGlobals.debugFlag:
            assert self.XMLTree  # Fail here if we didn't load anything at all

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['VerseView'] = {}

        # Find the main (bible) container
        if self.XMLTree.tag == VerseViewXMLBible.treeTag:
            location = "VerseView XML file"
            BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h')
            BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location,
                                                    'js24')
            BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8')

            # Find the submain (various info and then book) containers
            bookNumber = 0
            for element in self.XMLTree:
                if element.tag == VerseViewXMLBible.filenameTag:
                    sublocation = "filename in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    #self.filename = element.text
                elif element.tag == VerseViewXMLBible.revisionTag:
                    sublocation = "revision in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Revision'] = element.text
                elif element.tag == VerseViewXMLBible.titleTag:
                    sublocation = "title in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Title'] = element.text
                elif element.tag == VerseViewXMLBible.fontTag:
                    sublocation = "font in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Font'] = element.text
                elif element.tag == VerseViewXMLBible.copyrightTag:
                    sublocation = "copyright in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Copyright'] = element.text
                elif element.tag == VerseViewXMLBible.sizefactorTag:
                    sublocation = "sizefactor in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    if BibleOrgSysGlobals.debugFlag: assert element.text == '1'
                elif element.tag == VerseViewXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'g3g5')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'd3f6')
                    bookNumber += 1
                    self.__validateAndExtractBook(element, bookNumber)
                else:
                    logging.error(
                        "xk15 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load {!r} but got {!r}".format(
                VerseViewXMLBible.treeTag, self.XMLTree.tag))

        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            # These are all compulsory so they should all exist
            #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) )
            vPrint(
                'Quiet', debuggingThisModule, "Revision is {!r}".format(
                    self.suppliedMetadata['VerseView']['Revision']))
            vPrint(
                'Quiet', debuggingThisModule, "Title is {!r}".format(
                    self.suppliedMetadata['VerseView']['Title']))
            vPrint(
                'Quiet', debuggingThisModule, "Font is {!r}".format(
                    self.suppliedMetadata['VerseView']['Font']))
            vPrint(
                'Quiet', debuggingThisModule, "Copyright is {!r}".format(
                    self.suppliedMetadata['VerseView']['Copyright']))
            #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) )

        self.applySuppliedMetadata(
            'VerseView')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

예제 #9

파일 보기

파일: BibleReferencesLinksConverter.py 프로젝트: janfri/BibleOrgSys

    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLTree if you prefer.)
        """
        def makeList(parameter1, parameter2):
            """
            Returns a list containing all parameters. Parameter1 may already be a list.
            """
            if isinstance(parameter1, list):
                #assert parameter2 not in parameter1
                parameter1.append(parameter2)
                return parameter1
            else:
                return [parameter1, parameter2]

        # end of makeList

        assert self._XMLTree
        if self.__DataList:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataList, self.__DataDict

        # We'll create a number of dictionaries with different elements as the key
        rawRefLinkList = []
        actualLinkCount = 0
        for element in self._XMLTree:
            #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) )

            # Get these first for helpful error messages
            sourceReference = element.find('sourceReference').text
            sourceComponent = element.find('sourceComponent').text
            assert sourceComponent in (
                'Section',
                'Verses',
                'Verse',
            )

            BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1')
            BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference,
                                                    'kd21')
            BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20')

            actualRawLinksList = []
            for subelement in element:
                #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) )
                if subelement.tag in (
                        'sourceReference',
                        'sourceComponent',
                ):  # already processed these
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'ls12')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sourceReference, 'ks02')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'sqw1')

                elif subelement.tag == 'BibleReferenceLink':
                    BibleOrgSysGlobals.checkXMLNoText(subelement,
                                                      sourceReference, 'haw9')
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'hs19')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'jsd9')

                    targetReference = subelement.find('targetReference').text
                    targetComponent = subelement.find('targetComponent').text
                    assert targetComponent in (
                        'Section',
                        'Verses',
                        'Verse',
                    )
                    linkType = subelement.find('linkType').text
                    assert linkType in (
                        'TSK',
                        'QuotedOTReference',
                        'AlludedOTReference',
                        'PossibleOTReference',
                    )

                    actualRawLinksList.append((
                        targetReference,
                        targetComponent,
                        linkType,
                    ))
                    actualLinkCount += 1

            rawRefLinkList.append((
                sourceReference,
                sourceComponent,
                actualRawLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            f"  {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)"
        )

        myRefLinkList = []
        actualLinkCount = 0
        BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        for j, (sourceReference, sourceComponent,
                actualRawLinksList) in enumerate(rawRefLinkList):
            # Just do some testing first
            if sourceComponent == 'Verse':
                x = SimpleVerseKey(sourceReference)
            else:
                flag = False
                try:
                    x = SimpleVerseKey(sourceReference, ignoreParseErrors=True)
                    flag = True
                except TypeError:
                    pass  # This should happen coz it should fail the SVK
                if flag:
                    logging.error("{} {!r} failed!".format(
                        sourceComponent, sourceReference))
                    raise TypeError
            # Now do the actual parsing
            parsedSourceReference = FlexibleVersesKey(sourceReference)
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                vPrint('Quiet', debuggingThisModule, j, sourceComponent,
                       sourceReference, parsedSourceReference)
                #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference
            actualLinksList = []
            for k, (targetReference, targetComponent,
                    linkType) in enumerate(actualRawLinksList):
                # Just do some testing first
                if targetComponent == 'Verse':
                    x = SimpleVerseKey(targetReference)
                else:
                    flag = False
                    try:
                        x = SimpleVerseKey(targetReference,
                                           ignoreParseErrors=True)
                        flag = True
                    except TypeError:
                        pass  # This should happen coz it should fail the SVK
                    if flag:
                        logging.error("{} {!r} failed!".format(
                            targetComponent, targetReference))
                        raise TypeError
                # Now do the actual parsing
                try:
                    parsedTargetReference = FlexibleVersesKey(targetReference)
                except TypeError:
                    logging.error(
                        "  Temporarily ignored {!r} (TypeError from FlexibleVersesKey)"
                        .format(targetReference))
                    parsedTargetReference = None
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    vPrint('Quiet', debuggingThisModule, ' ', targetComponent,
                           targetReference, parsedTargetReference)
                    #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference

                actualLinksList.append((
                    targetReference,
                    targetComponent,
                    parsedTargetReference,
                    linkType,
                ))
                actualLinkCount += 1

            myRefLinkList.append((
                sourceReference,
                sourceComponent,
                parsedSourceReference,
                actualLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            "  {:,} links processed (with {:,} actual link entries)".format(
                len(rawRefLinkList), actualLinkCount))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt
        self.__DataList = myRefLinkList

        # Now put it into my dictionaries for easy access
        # This part should be customized or added to for however you need to process the data

        # Create a link dictionary (by verse key)
        myRefLinkDict = {}
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for verseRef in parsedSourceReference.getIncludedVerses():
                #vPrint( 'Quiet', debuggingThisModule, verseRef )
                assert isinstance(verseRef, SimpleVerseKey)
                if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                myRefLinkDict[verseRef].append((
                    sourceReference,
                    sourceComponent,
                    parsedSourceReference,
                    actualLinksList,
                ))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        originalLinks = len(myRefLinkDict)
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} verse links added to dictionary (includes filling out spans)"
            .format(originalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        # Create a reversed link dictionary (by verse key)
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList:
                if parsedTargetReference is not None:
                    for verseRef in parsedTargetReference.getIncludedVerses():
                        #vPrint( 'Quiet', debuggingThisModule, verseRef )
                        assert isinstance(verseRef, SimpleVerseKey)
                        if linkType == 'TSK': reverseLinkType = 'TSKQuoted'
                        elif linkType == 'QuotedOTReference':
                            reverseLinkType = 'OTReferenceQuoted'
                        elif linkType == 'AlludedOTReference':
                            reverseLinkType = 'OTReferenceAlluded'
                        elif linkType == 'PossibleOTReference':
                            reverseLinkType = 'OTReferencePossible'
                        else:
                            halt  # Have a new linkType!
                        if verseRef not in myRefLinkDict:
                            myRefLinkDict[verseRef] = []
                        myRefLinkDict[verseRef].append(
                            (targetReference, targetComponent,
                             parsedTargetReference, [
                                 (sourceReference, sourceComponent,
                                  parsedSourceReference, reverseLinkType)
                             ]))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        totalLinks = len(myRefLinkDict)
        reverseLinks = totalLinks - originalLinks
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} reverse links added to dictionary to give {:,} total".
            format(reverseLinks, totalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        self.__DataDict = myRefLinkDict

        # Let's find the most number of references for a verse
        mostReferences = totalReferences = 0
        for verseRef, entryList in self.__DataDict.items():
            numRefs = len(entryList)
            if numRefs > mostReferences:
                mostReferences, mostVerseRef = numRefs, verseRef
            totalReferences += numRefs
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} maximum links for any one reference ({})".format(
                mostReferences, mostVerseRef.getShortText()))
        vPrint('Quiet', debuggingThisModule,
               "  {:,} total links for all references".format(totalReferences))

        return self.__DataList, self.__DataDict

예제 #10

파일 보기

파일: BibleReferencesLinksConverter.py 프로젝트: janfri/BibleOrgSys

    def __validate(self):
        """
        Check/validate the loaded data.
        """
        assert self._XMLTree

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLTree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self._compulsoryAttributes and not self._optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self._compulsoryElements and not self._optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Get the sourceComponent to use as a record ID
                ID = element.find("sourceComponent").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    foundElement = element.find(elementName)
                    if foundElement is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, j))
                    else:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag )
                        if not foundElement.text:
                            logging.warning(
                                _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    foundElement = element.find(elementName)
                    if foundElement is not None:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoSubelements(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        if not foundElement.text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, j))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))
            if element.tail is not None and element.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after {} element in record {}"
                      ).format(element.tail, element.tag, j))
        if self._XMLTree.tail is not None and self._XMLTree.tail.strip():
            logging.error(
                _("Unexpected {!r} tail data after {} element").format(
                    self._XMLTree.tail, self._XMLTree.tag))

예제 #11

파일 보기

파일: BibleBookOrdersConverter.py 프로젝트: pkabore/BibleOrgSys

    def __validateSystem(self, bookOrderTree, systemName):
        """ Do a semi-automatic check of the XML file validity. """
        assert bookOrderTree

        uniqueDict = {}
        for elementName in self.uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self.uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for k, element in enumerate(bookOrderTree):
            if element.tag == self.mainElementTag:
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check ascending ID field
                ID = element.get("id")
                intID = int(ID)
                if intID != expectedID:
                    logging.error(
                        _("ID numbers out of sequence in record {} (got {} when expecting {}) for {}"
                          ).format(k, intID, expectedID, systemName))
                expectedID += 1

                # Check that this is unique
                if element.text:
                    if element.text in uniqueDict:
                        logging.error(
                            _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {}) for {}"
                              ).format(element.text, element.tag, ID, k,
                                       systemName))
                    uniqueDict[element.text] = None

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, k))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, k))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, k))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, k))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, k))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, k))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, k))

예제 #12

파일 보기

    def __validateSystem( self, systemName ):
        """
        Checks for basic formatting/content errors in a Bible book name system.
        """
        assert systemName
        assert self.__XMLSystems[systemName]['tree']

        if len(self.__XMLSystems[systemName]["languageCode"]) != 3:
            logging.error( _("Couldn't find 3-letter language code in {!r} book names system").format( systemName ) )
        #if self.__ISOLanguages and not self.__ISOLanguages.isValidLanguageCode( self.__XMLSystems[systemName]["languageCode"] ): # Check that we have a valid language code
            #logging.error( _("Unrecognized {!r} ISO-639-3 language code in {!r} book names system").format( self.__XMLSystems[systemName]["languageCode"], systemName ) )

        uniqueDict = {}
        for index in range( len(self.mainElementTags) ):
            for elementName in self.uniqueElements[index]: uniqueDict["Element_"+str(index)+"_"+elementName] = []
            for attributeName in self.uniqueAttributes[index]: uniqueDict["Attribute_"+str(index)+"_"+attributeName] = []

        expectedID = 1
        for k,element in enumerate(self.__XMLSystems[systemName]['tree']):
            if element.tag in self.mainElementTags:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag )
                if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                index = self.mainElementTags.index( element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes[index]:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( _("Compulsory {!r} attribute is missing from {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) )
                    if not attributeValue:
                        logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) )

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes[index]:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( _("Optional {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self.compulsoryAttributes[index] and attributeName not in self.optionalAttributes[index]:
                        logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {} in {}").format( attributeName, attributeValue, element.tag, k, systemName ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes[index]:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_"+str(index)+"_"+attributeName]:
                            logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {} in {}").format( attributeValue, attributeName, element.tag, k, systemName ) )
                        uniqueDict["Attribute_"+str(index)+"_"+attributeName].append( attributeValue )

                # Check compulsory elements
                for elementName in self.compulsoryElements[index]:
                    if element.find( elementName ) is None:
                        logging.error( _("Compulsory {!r} element is missing (record {}) in {}").format( elementName, k, systemName ) )
                    if not element.find( elementName ).text:
                        logging.warning( _("Compulsory {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) )

                # Check optional elements
                for elementName in self.optionalElements[index]:
                    if element.find( elementName ) is not None:
                        if not element.find( elementName ).text:
                            logging.warning( _("Optional {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) )

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements[index] and subelement.tag not in self.optionalElements[index]:
                        logging.warning( _("Additional {!r} element ({!r}) found (record {}) in {} {}").format( subelement.tag, subelement.text, k, systemName, element.tag ) )

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements[index]:
                    if element.find( elementName ) is not None:
                        text = element.find( elementName ).text
                        if text in uniqueDict["Element_"+str(index)+"_"+elementName]:
                            myLogging = logging.info if element.tag == 'BibleDivisionNames' else logging.error
                            myLogging( _("Found {!r} data repeated in {!r} element (record {}) in {}").format( text, elementName, k, systemName ) )
                        uniqueDict["Element_"+str(index)+"_"+elementName].append( text )
            else:
                logging.warning( _("Unexpected element: {} in record {} in {}").format( element.tag, k, systemName ) )

예제 #13

파일 보기

    def __validateAndExtractChapter(self, BBB: str, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        vPrint('Verbose', debuggingThisModule, _("Validating XML chapter…"))

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "n":
                chapterNumber = value
            elif attrib == "VERSES":
                numVerses = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #dPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace(
                'of Solomon ', '')  # Fix a mistake in the Chinese_SU module
            thisBook.addLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for {}".format(BBB))

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format(BBB, chapterNumber)
                BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks')
                verseNumber = toVerseNumber = None
                for attrib, value in element.items():
                    if attrib == "n":
                        verseNumber = value
                    elif attrib == "t":
                        toVerseNumber = value
                    else:
                        logging.warning(
                            "Unprocessed {!r} attribute ({}) in verse element".
                            format(attrib, value))
                if BibleOrgSysGlobals.debugFlag: assert verseNumber
                #thisBook.addLine( 'v', verseNumber )
                vText = element.text if element.text else ''
                for subelement in element:
                    sub2location = "{} in {}".format(subelement.tag,
                                                     sublocation)
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sub2location, 'ks03')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sub2location, 'ks05')
                    if subelement.tag == 'i':
                        vText += '\\it {}\\it*{}'.format(
                            subelement.text, subelement.tail)
                    else:
                        logging.error(
                            "Expected to find 'i' but got {!r}".format(
                                subelement.tag))
                vText += element.tail if element.tail else ''
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, verseNumber))
                #dPrint( 'Quiet', debuggingThisModule, 'vText1', vText )
                if vText:  # This is the main text of the verse (follows the verse milestone)
                    #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText:  # This is how they represent poety
                        #dPrint( 'Quiet', debuggingThisModule, "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate(vText.split('\n')):
                            if j == 0:
                                thisBook.addLine('q1', '')
                                thisBook.addLine('v',
                                                 verseNumber + ' ' + textBit)
                            else:
                                thisBook.addLine('q1', textBit)
                    else:  # Just one verse line
                        thisBook.addLine('v', verseNumber + ' ' + vText)
                #dPrint( 'Quiet', debuggingThisModule, 'vText2', vText )
            else:
                logging.error("Expected to find {!r} but got {!r}".format(
                    OpenSongXMLBible.verseTag, element.tag))

예제 #14

파일 보기

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        vPrint('Verbose', debuggingThisModule,
               _("Validating OpenSong XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBBFromText(
                bookName)  # Booknames are usually in English
            if not BBB:  # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText(
                    bookName)  # Try non-English booknames
                #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB )
            if BBB:
                vPrint('Info', debuggingThisModule,
                       _("Validating {} {}…").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                if not USFMAbbreviation:
                    logging.critical(
                        f"Unable to find USFM abbreviation for '{BBB}'")
                    if BibleOrgSysGlobals.strictCheckingFlag: halt
                    USFMAbbreviation = 'XXA'
                thisBook.addLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     programNameVersion))
                thisBook.addLine('h', bookName)
                thisBook.addLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        BibleOrgSysGlobals.checkXMLNoText(
                            element, sublocation, 'j3jd')
                        BibleOrgSysGlobals.checkXMLNoTail(
                            element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find {!r} but got {!r}".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                vPrint('Info', debuggingThisModule,
                       "  Saving {} into results…".format(BBB))
                self.stashBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: {!r}").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName

예제 #15

파일 보기

    def validateEntry(self, entry) -> None:
        """
        Check/validate the given Strongs Greek lexicon entry.

        Adds good entries to self.StrongsEntries.
        """
        if BibleOrgSysGlobals.debugFlag: assert entry.tag == 'entry'
        BibleOrgSysGlobals.checkXMLNoText(entry, entry.tag, "na19")
        BibleOrgSysGlobals.checkXMLNoTail(entry, entry.tag, "kaq9")

        # Process the entry attributes first
        strongs5 = None
        for attrib, value in entry.items():
            if attrib == 'strongs':
                strongs5 = value
                #dPrint( 'Never', debuggingThisModule, f"Validating {strongs5} entry…" )
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in main entry element".
                    format(attrib, value))
        if BibleOrgSysGlobals.debugFlag:
            assert len(strongs5) == 5 and strongs5.isdigit()

        entryResults = {}
        entryString = ""
        gettingEssentials = True
        for j, element in enumerate(entry):
            #dPrint( 'Quiet', debuggingThisModule, strongs5, j, element.tag, repr(entryString) )
            if element.tag == "strongs":
                if BibleOrgSysGlobals.debugFlag:
                    assert gettingEssentials and j == 0 and element.text
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, element.tag, "md3d")
                if strongs5 != '02717' and (3203 > int(strongs5) > 3302):
                    BibleOrgSysGlobals.checkXMLNoTail(element, element.tag,
                                                      "f3g7")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, element.tag, "m56g")
                strongs = element.text
                if BibleOrgSysGlobals.debugFlag:
                    assert strongs5.endswith(strongs)
                if element.tail and element.tail.strip():
                    entryString += element.tail.strip()
            elif element.tag == "greek":
                location = "greek in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "jke0")
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "df35")
                # Process the attributes
                translit = greek = beta = None
                for attrib, value in element.items():
                    if attrib == "translit": translit = value
                    elif attrib == "unicode": greek = value
                    elif attrib == "BETA": beta = value
                    else:
                        logging.warning(
                            "scs4 Unprocessed {!r} attribute ({}) in {}".
                            format(attrib, value, location))
                if BibleOrgSysGlobals.debugFlag:
                    assert greek and translit and beta
                if 'word' not in entryResults:  # This is the first/main entry
                    if BibleOrgSysGlobals.debugFlag:
                        assert gettingEssentials and j == 1
                    BibleOrgSysGlobals.checkXMLNoTail(element, location,
                                                      "ks24")
                    entryResults['word'] = (greek, translit, beta)
                else:
                    #dPrint( 'Quiet', debuggingThisModule, "Have multiple greek entries in " + strongs5 )
                    if BibleOrgSysGlobals.debugFlag: assert j > 2
                    gettingEssentials = False
                    entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML(
                        element, strongs5)  #.replace( '\n', '' )
            elif element.tag == "pronunciation":
                location = "pronunciation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "0s20")
                # Process the attributes
                pronunciation = None
                for attrib, value in element.items():
                    if attrib == "strongs": pronunciation = value
                    else:
                        logging.warning(
                            "scs4 Unprocessed {!r} attribute ({}) in {}".
                            format(attrib, value, location))
                if gettingEssentials:
                    #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" )
                    if BibleOrgSysGlobals.debugFlag:
                        assert j == 2
                        assert pronunciation
                        assert 'pronunciation' not in entryResults
                    entryResults['pronunciation'] = pronunciation
                else:
                    if BibleOrgSysGlobals.debugFlag:
                        assert j > 2 and not gettingEssentials
                    if element.tail and element.tail.strip():
                        entryString += element.tail.strip().replace('\n', '')
            elif element.tag == "strongs_derivation":
                location = "strongs_derivation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, location, "jke0")
                BibleOrgSysGlobals.checkXMLNoTail(element, location, "ks24")
                derivation = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "derivation", repr(derivation) )
                if BibleOrgSysGlobals.debugFlag:
                    assert derivation and '\t' not in derivation and '\n' not in derivation
                entryString += derivation
            elif element.tag == "strongs_def":
                location = "strongs_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, location, "jke0")
                BibleOrgSysGlobals.checkXMLNoTail(element, location, "jd28")
                definition = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "definition", repr(definition) )
                if BibleOrgSysGlobals.debugFlag:
                    assert definition and '\t' not in definition and '\n' not in definition
                entryString += definition
            elif element.tag == "kjv_def":
                location = "kjv_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, location, "jke0")
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" )
                #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" )
                KJVdefinition = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) )
                if BibleOrgSysGlobals.debugFlag:
                    assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition
                entryString += KJVdefinition
            elif element.tag == "strongsref":
                location = "strongsref in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "kls2")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "ks24")
                strongsRef = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                if BibleOrgSysGlobals.debugFlag:
                    assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef
                strongsRef = re.sub('<language="GREEK" strongs="(\d{1,5})">',
                                    r'<StrongsRef>G\1</StrongsRef>',
                                    strongsRef)
                strongsRef = re.sub('<strongs="(\d{1,5})" language="GREEK">',
                                    r'<StrongsRef>G\1</StrongsRef>',
                                    strongsRef)
                #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "strongsRef", repr(strongsRef) )
                entryString += ' ' + strongsRef
            elif element.tag == "see":
                location = "see in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k")
                BibleOrgSysGlobals.checkXMLNoTail(element, location, "kd02")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "0s20")
                # Process the attributes
                seeLanguage = seeStrongsNumber = None
                for attrib, value in element.items():
                    if attrib == "language": seeLanguage = value
                    elif attrib == "strongs":
                        seeStrongsNumber = value  # Note: No leading zeroes here
                    else:
                        logging.warning(
                            "scs4 Unprocessed {!r} attribute ({}) in {}".
                            format(attrib, value, location))
                if BibleOrgSysGlobals.debugFlag:
                    assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit(
                    )
                    assert seeLanguage in (
                        'GREEK',
                        'HEBREW',
                    )
                if 'see' not in entryResults: entryResults['see'] = []
                entryResults['see'].append((
                    'G' if seeLanguage == 'GREEK' else 'H') + seeStrongsNumber)
            else:
                logging.error(
                    "2d4f Unprocessed {!r} element ({}) in entry".format(
                        element.tag, element.text))

        if entryString:
            #dPrint( 'Quiet', debuggingThisModule, strongs5, "entryString", repr(entryString) )
            if BibleOrgSysGlobals.debugFlag:
                assert '\t' not in entryString and '\n' not in entryString
            entryString = re.sub(
                '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>',
                r'<StrongsRef>G\1</StrongsRef>', entryString)
            entryString = re.sub(
                '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>',
                r'<StrongsRef>G\1</StrongsRef>', entryString)
            entryString = re.sub(
                '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>',
                r'<StrongsRef>H\1</StrongsRef>', entryString)
            entryString = re.sub(
                '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>',
                r'<StrongsRef>H\1</StrongsRef>', entryString)
            if BibleOrgSysGlobals.debugFlag:
                assert 'strongsref' not in entryString
            entryResults['Entry'] = entryString
        #dPrint( 'Quiet', debuggingThisModule, "entryResults", entryResults )
        self.StrongsEntries[strongs] = entryResults

예제 #16

파일 보기

    def _validateSystem(self, punctuationTree, systemName):
        """
        """
        assert punctuationTree

        uniqueDict = {}
        for elementName in self.uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self.uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        for k, element in enumerate(punctuationTree):
            if element.tag in self.mainElementTags:
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, k))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, k))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, k))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, k))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, k))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, k))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, k))