Python BibleBook 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Bible

클래스/타입: BibleBook

hotexamples.com에서의 예제들: 47

Python BibleBook - 47개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Bible.BibleBook에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

BibleBook(19)

objectNameString(17)

objectTypeString(17)

addLine(15)

appendLine(11)

__init__(3)

appendToLastLine(1)

예제 #1

파일 보기

파일: USXXMLBibleBook.py 프로젝트: dimleyk/BibleOrgSys

 def __init__( self, name, BBB ):
     """
     Create the USX Bible book object.
     """
     BibleBook.__init__( self, name, BBB ) # Initialise the base class
     self.objectNameString = "USX XML Bible Book object"
     self.objectTypeString = "USX"

예제 #2

파일 보기

파일: GoBible.py 프로젝트: DanTrevor/BibleOrgSys

 def __init__( self, containerBibleObject, BBB ):
     """
     Create the Go Bible book object.
     """
     BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class
     self.objectNameString = 'Go Bible Book object'
     self.objectTypeString = 'GoBible'

예제 #3

파일 보기

파일: ESFMBibleBook.py 프로젝트: alerque/BibleOrgSys

 def __init__( self, containerBibleObject, BBB ):
     """
     Create the ESFM Bible book object.
     """
     BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class
     self.objectNameString = "ESFM Bible Book object"
     self.objectTypeString = "ESFM"

예제 #4

파일 보기

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book..."))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBB(bookName)
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print(_("Validating {} {}...").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                thisBook.addLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     ProgNameVersion))
                thisBook.addLine('h', bookName)
                thisBook.addLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        BibleOrgSysGlobals.checkXMLNoText(
                            element, sublocation, 'j3jd')
                        BibleOrgSysGlobals.checkXMLNoTail(
                            element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find {!r} but got {!r}".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print("  Saving {} into results...".format(BBB))
                self.saveBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: {!r}").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName

예제 #5

파일 보기

파일: VerseViewXMLBible.py 프로젝트: wangsamas/BibleOrgSys

    def __validateAndExtractBook(self, book, bookNumber):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))

        if bookName:
            BBB = self.genericBOS.getBBBFromText(bookName)
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName)
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText(adjustedBookName)
        BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
            bookNumber)
        if BBB2 != BBB:  # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                print("Assuming that book {} {!r} is {} (not {})".format(
                    bookNumber, bookName, BBB2, BBB))
            BBB = BBB2
            #print( BBB ); halt

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print(_("Validating {} {}…").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'j3jd')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error(
                        "vb26 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.chapterTag, element.tag))
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print("  Saving {} into results…".format(BBB))
            self.stashBook(thisBook)

예제 #6

파일 보기

    def __init__( self, containerBibleObject, BBB ):
        """
        Create the ESFM Bible book object.
        """
        BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class
        self.objectNameString = 'ESFM Bible Book object'
        self.objectTypeString = 'ESFM'

        global sortedNLMarkers
        if sortedNLMarkers is None:
            sortedNLMarkers = sorted( BibleOrgSysGlobals.USFMMarkers.getNewlineMarkersList('Combined'), key=len, reverse=True )

예제 #7

파일 보기

파일: ESFMBibleBook.py 프로젝트: DanTrevor/BibleOrgSys

    def __init__( self, containerBibleObject, BBB ):
        """
        Create the ESFM Bible book object.
        """
        BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class
        self.objectNameString = 'ESFM Bible Book object'
        self.objectTypeString = 'ESFM'

        global sortedNLMarkers
        if sortedNLMarkers is None:
            sortedNLMarkers = sorted( BibleOrgSysGlobals.USFMMarkers.getNewlineMarkersList('Combined'), key=len, reverse=True )

예제 #8

파일 보기

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating XML book..."))

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib, value in book.items():
            if attrib == "bnumber":
                bookNumber = value
            elif attrib == "bname":
                bookName = value
            elif attrib == "bsname":
                bookShortName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookNumber:
            try:
                BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                    bookNumber)
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB(bookName)

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print(_("Validating {} {}...").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = "Zefania XML Bible Book object"
            thisBook.objectTypeString = "Zefania"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == ZefaniaXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'j3jd')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error("Expected to find {!r} but got {!r}".format(
                        ZefaniaXMLBible.chapterTag, element.tag))
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print("  Saving {} into results...".format(BBB))
            self.saveBook(thisBook)

예제 #9

파일 보기

    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'Haggai XML Bible Book object'
            thisBook.objectTypeString = 'Haggai'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' )
                    thisBook.addLine( 'mt', element.text )
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )

예제 #10

파일 보기

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book..."))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBB(bookName)
            if BBB:
                if Globals.verbosityLevel > 2:
                    print(_("Validating {} {}...").format(BBB, bookName))
                thisBook = BibleBook(self.name, BBB)
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        Globals.checkXMLNoText(element, sublocation, 'j3jd')
                        Globals.checkXMLNoTail(element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find '{}' but got '{}'".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if Globals.verbosityLevel > 2:
                    print("  Saving {} into results...".format(BBB))
                self.saveBook(thisBook)
            logging.error(
                _("OpenSong load doesn't recognize book name: '{}'").format(
                    bookName))
        logging.error(_("OpenSong load can't find a book name"))

예제 #11

파일 보기

    def __validateAndExtractBook( self, book, bookNumber ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )

        if bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName )
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText( adjustedBookName )
        BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
        if BBB2 != BBB: # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                print( "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) )
            BBB = BBB2
            #print( BBB ); halt

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )

예제 #12

파일 보기

파일: ZefaniaXMLBible.py 프로젝트: dimleyk/BibleOrgSys

    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print( _("Validating XML book...") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB( bookName )

        if BBB:
            if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
            thisBook = BibleBook( self.name, BBB )
            thisBook.objectNameString = "Zefania XML Bible Book object"
            thisBook.objectTypeString = "Zefania"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == ZefaniaXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    Globals.checkXMLNoText( element, sublocation, 'j3jd' )
                    Globals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.chapterTag, element.tag ) )
            if Globals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
            self.saveBook( thisBook )

예제 #13

파일 보기

    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book…") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBBFromText( bookName ) # Booknames are usually in English
            if not BBB: # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText( bookName ) # Try non-English booknames
                #print( "bookName", bookName, BBB )
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.addLine( 'h', bookName )
                thisBook.addLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                        BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
                self.stashBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName

예제 #14

파일 보기

파일: OpenSongXMLBible.py 프로젝트: beniza/BibleOrgSys

    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBB( bookName )
            if BBB:
                if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = Globals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.appendLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.appendLine( 'h', bookName )
                thisBook.appendLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        Globals.checkXMLNoText( element, sublocation, 'j3jd' )
                        Globals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if Globals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
                self.saveBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName

예제 #15

파일 보기

파일: DrupalBible.py 프로젝트: openscriptures/BibleOrgSys

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        status = 0 # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1:
                    if line[0]==chr(65279): #U+FEFF
                        logging.info( "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) )
                        line = line[1:] # Remove the UTF-16 Unicode Byte Order Marker (BOM)
                    elif line[:3] == 'ï»¿': # 0xEF,0xBB,0xBF
                        logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) )
                        line = line[3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM)
                if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines

                #print ( 'DB file line is "' + line + '"' )
                if line[0] == '#': continue # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning( "Unknown DrupalBible first line: {}".format( repr(line) ) )

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else: # Get the version name details
                        bits = line.split( '|' )
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else: # Get the book name details
                        bits = line.split( '|' )
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert bookShortName == bookCode
                        BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode )
                        BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first)
                        bookDetails[BBB] = bookFullName, bookShortName, numChapters

                elif status == 2: # Get the verse text
                    bits = line.split( '|' )
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark: print( repr(lineMark) ); halt
                    BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode )
                    BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.stashBook( thisBook )
                        thisBook = BibleBook( self, BBB )
                        thisBook.objectNameString = 'DrupalBible Bible Book object'
                        thisBook.objectTypeString = 'DrupalBible'
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.addLine( 'c', chapterNumberString )
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace( '<', '\\it ' ).replace( '>', '\\it*' )
                    thisBook.addLine( 'v', verseNumberString + ' ' + verseText )

                else: halt

        # Save the final book
        self.stashBook( thisBook )
        self.doPostLoadProcessing()

예제 #16

파일 보기

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        lastLine, lineCount = '', 0
        BBB = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and self.encoding.lower(
                ) == 'utf-8' and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        "      VPLBible.load: Detected UTF-16 Byte Order Marker"
                    )
                    line = line[1:]  # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'VLP file line is "' + line + '"' )
                if line[0] == '#': continue  # Just discard comment lines

                bits = line.split(' ', 2)
                #print( self.givenName, BBB, bits )
                if len(bits) == 3 and ':' in bits[1]:
                    bookCode, CVString, vText = bits
                    chapterNumberString, verseNumberString = CVString.split(
                        ':')
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print("Skipping empty line in {} {} {} {}:{}".format(
                        self.givenName, BBB, bookCode, chapterNumberString,
                        verseNumberString))
                    continue
                if BibleOrgSysGlobals.debugFlag:
                    assert (2 <= len(bookCode) <= 4)
                if BibleOrgSysGlobals.debugFlag:
                    assert (chapterNumberString.isdigit())
                if not verseNumberString.isdigit():
                    logging.error(
                        "Invalid verse number field at {}/{} {}:{!r}".format(
                            bookCode, BBB, chapterNumberString,
                            verseNumberString))
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                        assert (verseNumberString.isdigit())
                    continue
                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.saveBook(thisBook)
                    #if bookCode in ('Ge',): BBB = 'GEN'
                    #elif bookCode in ('Le',): BBB = 'LEV'
                    ##elif bookCode in ('Jud',): BBB = 'JDG'
                    #elif bookCode in ('Es',): BBB = 'EST'
                    #elif bookCode in ('Pr',): BBB = 'PRO'
                    #elif bookCode in ('So',): BBB = 'SNG'
                    #elif bookCode in ('La',): BBB = 'LAM'
                    #elif bookCode in ('Jude',): BBB = 'JDE'
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB(
                        bookCode)  # Try to guess
                    if BBB:
                        thisBook = BibleBook(self, BBB)
                        thisBook.objectNameString = "VPL Bible Book object"
                        thisBook.objectTypeString = "VPL"
                        lastBookCode = bookCode
                        lastChapterNumber = lastVerseNumber = -1
                    else:
                        logging.critical(
                            "VPLBible could not figure out {!r} book code".
                            format(bookCode))
                        if BibleOrgSysGlobals.debugFlag: halt

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert (chapterNumber > lastChapterNumber
                                or BBB == 'ESG'
                                )  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle special formatting
                #   [brackets] are for Italicized words
                #   <brackets> are for the Words of Christ in Red
                #   «brackets»  are for the Titles in the Book  of Psalms.
                vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                if vText and vText[0] == '«':
                    #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                    if BBB == 'PSA' and verseNumberString == '1':  # Psalm title
                        vBits = vText[1:].split('»')
                        #print( "vBits", vBits )
                        thisBook.addLine('d', vBits[0])  # Psalm title
                        vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()

예제 #17

파일 보기

파일: EasyWorshipBible.py 프로젝트: openscriptures/BibleOrgSys

    def load( self ):
        """
        Load the compressed data file and import book objects.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) )
        with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done
            fileBytes = myFile.read()
        if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            print( "  {:,} bytes read".format( len(fileBytes) ) )

        keep = OrderedDict()
        index = 0

        # Block 1 is 32-bytes long and always the same for EW2009 Bibles
        #if debuggingThisModule: print( 'introBlock', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] )
        keep['introBlock'] = (index,fileBytes[index:index+32])
        hString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            hString += chr( char8 )
        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index )
        if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag:
            assert hString == 'EasyWorship Bible Text'
        introBlockb = fileBytes[index+j:index+32]
        #if BibleOrgSysGlobals.debugFlag: print( 'introBlockb', hexlify( introBlockb ), introBlockb )
        assert introBlockb == b'\x1a\x02<\x00\x00\x00\xe0\x00\x00\x00' # b'1a023c000000e0000000'
        # Skipped some (important?) binary here??? but it's the same for every module
        index += 32

        # Block 2 is 56-bytes long
        moduleNameBlock = fileBytes[index:index+56]
        keep['moduleNameBlock'] = (index,moduleNameBlock)
        #if debuggingThisModule: print( 'moduleNameBlock', hexlify( moduleNameBlock ), moduleNameBlock )
        nString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            nString += chr( char8 )
        #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: print( 'nString', repr(nString), index )
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( "EasyWorshipBible.load: " + _("Setting module name to {!r}").format( self.name ) )
        self.name = nString
        #assert self.name # Not there for amp and gkm
        moduleNameBlockb = fileBytes[index+j:index+56]
        #if BibleOrgSysGlobals.debugFlag: print( 'moduleNameBlockb', len(moduleNameBlockb), hexlify( moduleNameBlockb ), moduleNameBlockb )
        #assert moduleNameBlockb.endswith( b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00' ) # b'000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000'
        for ix in range( index+j, index+56 ): # Mostly zeroes remaining
            if ix == 84: # What does this mean???
                value = fileBytes[ix]
                assert value in (0,1,2,3,4,5) # bbe=0, alb=1, esv2=2, esv=3, asv=4 nasb=5 Revision number???
                keep['byte84'] = (index,value)
            else: assert fileBytes[ix] == 0
        index += 56

        # Get the optional booknames and the raw data for each book into a list
        rawBooks = []
        for bookNumber in range( 1, 66+1 ):
            bookInfoBlock = fileBytes[index:index+51]
            blockName = 'bookInfoBlock-{}'.format( bookNumber )
            keep[blockName] = (index,bookInfoBlock)
            #if debuggingThisModule: print( blockName, hexlify( bookInfoBlock ), bookInfoBlock )
            bookName = ''
            for j in range( 0, 32 ):
                char8 = fileBytes[index+j]
                #print( char8, repr(char8) )
                if char8 < 0x20: break # bookName seems quite optional -- maybe the English ones are assumed if empty???
                bookName += chr( char8 )
            assert fileBytes[index+j:index+51] == b'\x00' * (51-j) # Skipped some zeroes here
            index += 51
            if bookName and bookName[-1] == '.': bookName = bookName[:-1] # Remove final period
            #if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2:
                #print( 'bookName', repr(bookName) )
            numChapters = fileBytes[index]
            numVerses = []
            for j in range( 0, numChapters ):
                numVerses.append( fileBytes[index+j+1] )
            #print( "here1", 157-j-2, hexlify(fileBytes[index+j+2:index+157]), fileBytes[index+j+2:index+157] )
            if self.abbreviation != 'fn1938': # Why does this fail???
                assert fileBytes[index+j+2:index+157] == b'\x00' * (157-j-2) # Skipped some zeroes here
            index += 157
            #if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
                #print( ' {!r} numChapters={} verses={}'.format( bookName, numChapters, numVerses ) )
            bookStart, = struct.unpack( "<I", fileBytes[index:index+4] )
            assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here
            index += 8
            #if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
                #print( '    bookStart is at {:,}'.format( bookStart ) )
            bookLength, = struct.unpack( "<I", fileBytes[index:index+4] )
            assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here
            index += 8
            #if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
                #print( '    {} bookLength is {:,} which goes to {:,}'.format( bookNumber, bookLength, bookStart+bookLength ) )
            bookBytes = fileBytes[bookStart:bookStart+bookLength] # Looking ahead into the file
            rawBooks.append( (bookName, numChapters, numVerses, bookStart, bookLength, bookBytes) )
            if bookLength == 0: # e.g., gkm Philippians (book number 50)
                logging.critical( "Booknumber {} is empty in {}".format( bookNumber, self.abbreviation ) )
            else:
                #if debuggingThisModule:
                    #print( "cHeader1 for {}: {}={} {}={}".format( self.abbreviation, bookBytes[0], hexlify(bookBytes[0:1]), bookBytes[1], hexlify(bookBytes[1:2]) ) )
                assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header (for compression levels 7-9)
        assert index == 14872 # 32 + 56 + 224*66

        workNameBlock = fileBytes[index:index+30] # 30 here is just a maximum, not fixed
        keep['workNameBlock'] = (index,workNameBlock) # This block starts with a length, then a work name, e.g., ezFreeASV
        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            #print( 'workNameBlock', index, hexlify(workNameBlock), workNameBlock )
        length3, = struct.unpack( "<I", fileBytes[index:index+4] )
        #print( "length3", length3 ) # Seems to include the compressed string plus six more bytes
        keep['length3'] = (index,length3)
        if length3:
            bookInfoBlock = fileBytes[index+4:index+4+length3-4-6]
            if debuggingThisModule:
                print( "cHeader2 for {}: {}={} {}={}".format( self.abbreviation, bookInfoBlock[0], hexlify(bookInfoBlock[0:1]), bookInfoBlock[1], hexlify(bookInfoBlock[1:2]) ) )
            assert bookInfoBlock[0]==0x78 and bookInfoBlock[1]==0xda # Zlib compression header (for compression levels 7-9)
            byteResult = zlib.decompress( bookInfoBlock )
            #rewriteResult1 = zlib.compress( byteResult, 9 )
            #byteResult1 = zlib.decompress( rewriteResult1 )
            #compressor = zlib.compressobj(level=9, method=zlib.DEFLATED, wbits=15, memLevel=8, strategy=zlib.Z_DEFAULT_STRATEGY )
            #rewriteResult2 = compressor.compress( byteResult )
            #rewriteResult2 += compressor.flush()
            #byteResult2 = zlib.decompress( rewriteResult2 )
            #print( "rewrite1 {} {} {}\n         {} {} {}\n         {} {} {}\n      to {} {}\n      to {} {}\n      to {} {}" \
                        #.format( len(bookInfoBlock), hexlify(bookInfoBlock), bookInfoBlock,
                                 #len(rewriteResult1), hexlify(rewriteResult1), rewriteResult1,
                                 #len(rewriteResult2), hexlify(rewriteResult2), rewriteResult2,
                                 #len(byteResult), byteResult,
                                 #len(byteResult1), byteResult1,
                                 #len(byteResult2), byteResult2 ) )
            textResult = byteResult.decode( 'utf8' )
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print( "Block4: Got {} chars {!r} from {} bytes".format( len(textResult), textResult, length3 ) )
            assert textResult.startswith('ezFree') or textResult.startswith('ezPaid')
            keep['workName'] = (index+4,textResult)
            if BibleOrgSysGlobals.verbosityLevel > 1:
                print( "EasyWorshipBible.load: " + _("Setting module work name to {!r}").format( textResult ) )
            if self.name: self.workName = textResult
            else: # Should rarely happen
                self.name = self.workName = textResult
            workNameAppendage = fileBytes[index+4+length3-6-4:index+4+length3-4]
            #print( "workNameAppendage", len(workNameAppendage), hexlify(workNameAppendage), workNameAppendage )
            keep['workNameAppendage'] = (index+4+length3-6-4,workNameAppendage)
            assert workNameAppendage[:4] == b'QK\x03\x04'
            uncompressedNameLength, = struct.unpack( "<B", workNameAppendage[4:5] )
            assert workNameAppendage[5:] == b'\x00'
            assert len(textResult) == uncompressedNameLength
        keep['length3'] = (index,length3)
        index += length3
        #print( self.abbreviation, len(textResult), repr(textResult), 'length3', length3, len(textResult)+18 )
        assert length3 == len(textResult) + 18

        bookDataStartIndex = rawBooks[0][3]
        #print( "bookDataStartIndex", bookDataStartIndex )

        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            #print( 'After known contents @ {:,}'.format( index ), hexlify( fileBytes[index:index+60] ), fileBytes[index:index+60] )

        block0080 = fileBytes[index:bookDataStartIndex]
        #print( "block0080", index, len(block0080), hexlify(block0080), block0080 )
        keep['block0080'] = (index,block0080)
        assert block0080 == b'\x00\x00\x08\x00' # b'00000800'
        index += len( block0080 )
        keep['bookDataStartIndex'] = (index,bookDataStartIndex)
        assert index == bookDataStartIndex # Should now be at the start of the first book (already fetched above)

        # Look at extra stuff right at the end of the file
        assert len(rawBooks) == 66
        index = bookStart + bookLength # of the last book
        endBytes = fileBytes[index:]
        #if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            #print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes )
        assert len(endBytes) == 16
        keep['endBytes'] = (index,endBytes)
        assert endBytes == b'\x18:\x00\x00\x00\x00\x00\x00ezwBible' # b'183a000000000000657a774269626c65'
        del fileBytes # Not needed any more

        # Now we have to decode the book text (compressed about 4x with zlib)
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "EWB loading books for {}…".format( self.abbreviation ) )
        for j, BBB in enumerate( BOS.getBookList() ):
            bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j]
            if bookLength == 0:
                assert not bookBytes
                logging.critical( "   Skipped empty {}".format( BBB ) )
                continue
            if BibleOrgSysGlobals.verbosityLevel > 2: print( '  Decoding {}…'.format( BBB ) )
            bookBytes, bookExtra = bookBytes[:-10], bookBytes[-10:]
            assert len(bookExtra) == 10
            keep['bookExtra-{}'.format(j+1)] = (-10,bookExtra)
            assert bookExtra[:4] == b'QK\x03\x04'
            uncompressedBookLength, = struct.unpack( "<I", bookExtra[4:8] )
            assert bookExtra[8:] == b'\x08\x00'
            byteResult = zlib.decompress( bookBytes )
            assert len(byteResult) == uncompressedBookLength
            try: textResult = byteResult.decode( 'utf8' )
            except UnicodeDecodeError:
                logging.critical( "Unable to decode {} {} bookText -- maybe it's not utf-8???".format( self.abbreviation, BBB ) )
                continue
            if debuggingThisModule:
                rewriteResult1 = zlib.compress( byteResult, 9 )
                byteResult1 = zlib.decompress( rewriteResult1 )
                if rewriteResult1 != bookBytes:
                    print( "\nbookBytes", len(bookBytes), hexlify(bookBytes) )
                    print( "\nrewriteResult1", len(rewriteResult1), hexlify(rewriteResult1) )
                    halt
                if byteResult1 != byteResult:
                    print( len(byteResult), hexlify(byteResult) )
                    print( len(byteResult1), hexlify(byteResult1) )
                    halt
            if '\t' in textResult:
                logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) )
                textResult = textResult.replace( '\t', ' ' )
            #print( textResult )
            if BibleOrgSysGlobals.strictCheckingFlag: assert '  ' not in textResult

            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'EasyWorship Bible Book object'
            thisBook.objectTypeString = 'EasyWorship Bible'
            if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev )

            C, V = '-1', '-1' # So first/id line starts at -1:0
            for line in textResult.split( '\r\n' ):
                if not line: continue # skip blank lines
                #if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    #print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) )
                assert line[0].isdigit()
                assert ':' in line[:4]
                CV,verseText = line.split( ' ', 1 )
                newC,newV = CV.split( ':' )
                #print( newC, V, repr(verseText) )
                if newC != C:
                    if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order
                        logging.critical( "Skipping error for out-of-order chapters in {}!".format( BBB ) )
                    else: assert int(newC) > int(C)
                    C, V = newC, '0'
                    thisBook.addLine( 'c', C )
                if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats
                    if int(newV) < int(V): break
                elif self.abbreviation=='drv' and BBB in ('GEN','EXO','NUM',): # Handle a bug -- Gen 18:1&12, Exo 28:42&43 out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                else:
                    try: assert int(newV) > int(V)
                    except ValueError:
                        logging.critical( "Something's not an integer around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) )
                    except AssertionError:
                        logging.critical( "Something's out of order around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) )
                V = newV
                thisBook.addLine( 'v', V + ' ' + verseText )

            if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB )
            self.stashBook( thisBook )

        self.doPostLoadProcessing()
        return keep

예제 #18

파일 보기

파일: ForgeForSwordSearcherBible.py 프로젝트: wangsamas/BibleOrgSys

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search('^; TITLE:\\s', line)
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print("First line got type {!r} match from {!r}".
                                  format(match.group(0), line))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}"
                                .format(firstLine, thisFilename))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue

                #print ( 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith('; TITLE:'):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith('; ABBREVIATION:'):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith('; HAS ITALICS'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES:'):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS REDLETTER'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0] == ';':
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}"
                        .format(line))
                    continue  # Just discard comment lines

                # Process the main segment
                if line.startswith('$$ '):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #print( "pointer", repr(pointer) )
                    if pointer and pointer[0] == '{' and pointer[-1] == '}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #print( "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else:  # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split(' ', 1)
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split(
                                ':')
                            chapterNumber = int(chapterNumberString)
                            verseNumber = int(verseNumberString)
                            if bookCode != lastBookCode:  # We've started a new book
                                if bookCode in ('Ge', ): BBB = 'GEN'
                                elif bookCode in ('Le', ): BBB = 'LEV'
                                elif bookCode in ('La', ):
                                    BBB = 'LAM'
                                    ##elif bookCode in ('Es',): BBB = 'EST'
                                    ##elif bookCode in ('Pr',): BBB = 'PRO'
                                    #elif bookCode in ('So',): BBB = 'SNG'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #print( "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText(
                                        bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOS81.getBBBFromText(
                                            bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOSx.getBBBFromText(
                                            bookCode)  # Try to guess
                                    #print( "4BBB =", repr(BBB) )
                        else:
                            print("Unexpected number of bits", self.givenName,
                                  BBB, bookCode, chapterNumberString,
                                  verseNumberString, len(bits), bits)
                    continue  # Just save the pointer information which refers to the text on the next line
                else:  # it's not a $$ line
                    text = line
                    #print( "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else
                                             '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace('(<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>)', '\\x*')
                        vText = vText.replace('<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>', '\\x*')
                        #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) )
                        #print( BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search('\\{(.+?)\\}', vText)
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format(
                                chapterNumber, verseNumber, match.group(1))
                            vText = vText[:match.start(
                            )] + footnoteText + vText[
                                match.end():]  # Replace this footnote
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\{(.+?)\\}', vText)
                        # Convert [stuff] to added fields
                        match = re.search('\\[(.+?)\\]', vText)
                        while match:
                            addText = '\\add {}\\add*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\[(.+?)\\]', vText)
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search('\\+r/(.+?)-r/', vText)
                        while match:
                            addText = '\\wj {}\\wj*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\+r/(.+?)-r/', vText)
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning(
                                    "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}"
                                    .format(BBB, chapterNumberString,
                                            verseNumberString, vText))
                                break

                if bookCode:
                    if bookCode != lastBookCode:  # We've started a new book
                        if lastBookCode != -1:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "ForgeForSwordSearcherBible could not figure out {!r} book code"
                                .format(bookCode))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCode,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCode yet
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}"
                        .format(line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #print( "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata(
                'Forge4SS')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()

예제 #19

파일 보기

    def load( self ):
        """
        Load the compressed data file and import book elements.
        """
        import zlib
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) )
        with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done
            fileBytes = myFile.read()
        if BibleOrgSysGlobals.debugFlag: print( "  {:,} bytes read".format( len(fileBytes) ) )

        keep = {}
        index = 0
        #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] )
        keep['block1'] = fileBytes[index:index+32]
        hString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            hString += chr( char8 )
        if BibleOrgSysGlobals.debugFlag: print( 'block1b', hexlify( fileBytes[index+j:index+32] ) )
        # Skipped some (important?) binary here
        index += 32
        if BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index )
        assert hString == 'EasyWorship Bible Text'

        #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] )
        keep['block2'] = fileBytes[index:index+56]
        nString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            nString += chr( char8 )
        # Skipped some zeroes here
        index += 56
        if BibleOrgSysGlobals.debugFlag: print( 'nString', repr(nString), index )
        self.name = nString

        rawBooks = []
        for b in range( 1, 66+1 ):
            bookAbbrev = ''
            for j in range( 0, 32 ):
                char8 = fileBytes[index+j]
                #print( char8, repr(char8) )
                if char8 < 0x20: break
                bookAbbrev += chr( char8 )
            # Skipped some zeroes here
            index += 51
            if bookAbbrev and bookAbbrev[-1] == '.': bookAbbrev = bookAbbrev[:-1] # Remove final period
            if BibleOrgSysGlobals.verbosityLevel > 2: print( 'bookAbbrev', repr(bookAbbrev) )
            numChapters = fileBytes[index]
            numVerses = []
            for j in range( 0, numChapters ):
                numVerses.append( fileBytes[index+j+1] )
            # Skipped some zeroes here
            index += 157
            if BibleOrgSysGlobals.debugFlag:
                print( ' ', numChapters, numVerses )
            bookStart, = struct.unpack( "<I", fileBytes[index:index+4] )
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print( '  bookStart', bookStart )
            bookLength, = struct.unpack( "<I", fileBytes[index:index+4] )
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print( '  bookLength', bookLength, bookStart+bookLength )
            bookBytes = fileBytes[bookStart:bookStart+bookLength]
            assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header
            rawBooks.append( (bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes) )

        if BibleOrgSysGlobals.debugFlag: print( 'unknown block3', index, hexlify( fileBytes[index:index+30] ) )
        keep['block3'] = fileBytes[index:index+30]
        length3, = struct.unpack( "<I", fileBytes[index:index+4] )
        if length3:
            block3 = fileBytes[index+4:index+4+length3-4]
            byteResult = zlib.decompress( block3 )
            textResult = byteResult.decode( 'utf8' )
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print( "Got", len(textResult), textResult, 'from', length3 )
            keep['block3n'] = textResult
            if self.name: print( 'Overwriting module name {!r} with {!r}'.format( self.name, textResult ) )
            self.name = textResult
        index += length3
        if BibleOrgSysGlobals.debugFlag: print( 'end of contents', index, hexlify( fileBytes[index:index+60] ) )
        keep['block4'] = rawBooks[0][3]

        block5 = fileBytes[index:rawBooks[0][3]]
        keep['block5'] = block5
        index += len( block5 )
        #if self.abbreviation in ( 'TB', ): # Why don't the others work
        assert index == rawBooks[0][3] # Should now be at the start of the first book (already fetched above)

        assert len(rawBooks) == 66
        # Look at extra stuff at end
        endBytes = fileBytes[bookStart+bookLength:]
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes )
        assert len(endBytes) == 16
        keep['block9'] = endBytes
        # Skipped some binary and some text here
        del fileBytes

        # Now we have to decode the book text (compressed about 4x with zlib)
        for j, BBB in enumerate( BOS.getBookList() ):
            if BibleOrgSysGlobals.verbosityLevel > 2: print( '  Decoding {}…'.format( BBB ) )
            bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j]
            byteResult = zlib.decompress( bookBytes )
            textResult = byteResult.decode( 'utf8' )
            if '\t' in textResult:
                logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) )
                textResult = textResult.replace( '\t', ' ' )
            #print( textResult )
            if BibleOrgSysGlobals.strictCheckingFlag: assert '  ' not in textResult

            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'EasyWorship Bible Book object'
            thisBook.objectTypeString = 'EasyWorship Bible'
            if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev )

            C = V = '0'
            for line in textResult.split( '\r\n' ):
                if not line: continue # skip blank lines
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) )
                assert line[0].isdigit()
                assert ':' in line[:4]
                CV,verseText = line.split( ' ', 1 )
                newC,newV = CV.split( ':' )
                #print( newC, V, repr(verseText) )
                if newC != C:
                    if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order
                        print( "Skipping error for out-of-order chapters in {}!".format( BBB ) )
                    else: assert int(newC) > int(C)
                    C, V = newC, '0'
                    thisBook.addLine( 'c', C )
                if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats
                    if int(newV) < int(V): break
                elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                else:
                    try: assert int(newV) > int(V)
                    except ValueError:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "Something's not an integer around {} {}:{} {}".format( BBB, C, V, verseText ) )
                V = newV
                thisBook.addLine( 'v', V + ' ' + verseText )

            if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB )
            self.stashBook( thisBook )

        self.doPostLoadProcessing()
        return keep

예제 #20

파일 보기

파일: UnboundBible.py 프로젝트: DanTrevor/BibleOrgSys

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['Unbound'] = {}

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" )
                #line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line and line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'UB file line is "' + line + '"' )
                if line[0] == '#':
                    hashBits = line[1:].split('\t')
                    if len(hashBits) == 2 and hashBits[
                            1]:  # We have some valid meta-data
                        self.suppliedMetadata['Unbound'][
                            hashBits[0]] = hashBits[1]
                        #if hashBits[0] == 'name': self.name = hashBits[1]
                        #elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        #elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        #elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        #elif hashBits[0] == 'language': self.language = hashBits[1]
                        #elif hashBits[0] == 'note': self.note = hashBits[1]
                        #elif hashBits[0] == 'columns': self.columns = hashBits[1]
                        #logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue  # Just discard comment lines

                bits = line.split('\t')
                #print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith(
                        'lxx_a_parsing_'):
                    logging.warning(
                        _("Skipping bad {!r} line in {} {} {} {}:{}").format(
                            line, self.givenName, BBB, bookCode,
                            chapterNumberString, verseNumberString))
                    continue
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)
                    halt

                if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3
                if NRSVA_chapterNumberString:
                    assert NRSVA_chapterNumberString.isdigit()
                if NRSVA_verseNumberString:
                    assert NRSVA_verseNumberString.isdigit()

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print("Skipping empty line in {} {} {} {}:{}".format(
                        self.givenName, BBB, bookCode, chapterNumberString,
                        verseNumberString))
                    continue
                if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3
                if BibleOrgSysGlobals.debugFlag:
                    assert chapterNumberString.isdigit()
                if BibleOrgSysGlobals.debugFlag:
                    assert verseNumberString.isdigit()

                if subverseNumberString:
                    logging.warning(
                        _("subverseNumberString {!r} in {} {} {}:{}").format(
                            subverseNumberString, BBB, bookCode,
                            chapterNumberString, verseNumberString))

                vText = vText.strip()  # Remove leading and trailing spaces
                if not vText: continue  # Just ignore blank verses I think
                if vText == '+':
                    continue  # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumberString.isdigit()
                    sequenceNumber = int(sequenceNumberString)
                    if BibleOrgSysGlobals.debugFlag:                        assert sequenceNumber > lastSequence or \
self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.stashBook(thisBook)
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode(
                        bookCode)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'Unbound Bible Book object'
                    thisBook.objectTypeString = 'Unbound'
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.stashBook(thisBook)
        self.applySuppliedMetadata('Unbound')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

예제 #21

파일 보기

파일: USFXXMLBible.py 프로젝트: beniza/BibleOrgSys

class USFXXMLBible( Bible ):
    """
    Class to load and manipulate USFX Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ):
        """
        Create the internal USFX Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFX XML Bible object"
        self.objectTypeString = "USFX"

        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters

        # Now we can set our object variables
        self.name = self.givenName
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFX Bible"
        if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects

        # Do a preliminary check on the readability of our folder
        if not os.access( self.sourceFolder, os.R_OK ):
            logging.error( "USFXXMLBible: Folder '{}' is unreadable".format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        self.sourceFilename = self.sourceFilepath = None
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundFiles.append( something )
            else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, foundFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in '{}'".format( self.sourceFolder ) )
            return # No use continuing

        #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles )
        numFound = 0
        for thisFilename in sorted( foundFiles ):
            firstLines = Globals.peekIntoFile( thisFilename, sourceFolder, numLines=3 )
            if not firstLines or len(firstLines)<2: continue
            if not firstLines[0].startswith( '<?xml version="1.0"' ) \
            and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM
                if Globals.verbosityLevel > 2: print( "USFXB (unexpected) first line was '{}' in {}".format( firstLines, thisFilename ) )
                continue
            if "<usfx " not in firstLines[0]:
                continue
            lastFilenameFound = thisFilename
            numFound += 1
        if numFound:
            if Globals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound )
            if numFound == 1:
                self.sourceFilename = lastFilenameFound
                self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename )
        elif looksHopeful and Globals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )
    # end of USFXXMLBible.__init_


    def load( self ):
        """
        Load the XML data file -- we should already know the filepath.
        """
        if Globals.verbosityLevel > 1:
            print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

                                #if Globals.verbosityLevel > 2: print( _("  It seems we have {}...").format( BBB ) )
                        #self.thisBook = BibleBook( self, BBB )
                        #self.thisBook.objectNameString = "OSIS XML Bible Book object"
                        #self.thisBook.objectTypeString = "OSIS"
                        #self.haveBook = True

        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError:
            errorString = sys.exc_info()[1]
            logging.critical( "USFXXMLBible.load: failed loading the xml file {}: '{}'.".format( self.sourceFilepath, errorString ) )
            return
        if Globals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (osis) container
        if self.tree.tag == 'usfx':
            location = "USFX file"
            Globals.checkXMLNoText( self.tree, location, '4f6h' )
            Globals.checkXMLNoTail( self.tree, location, '1wk8' )
            # Process the attributes first
            self.schemaLocation = None
            for attrib,value in self.tree.items():
                #print( "attrib", repr(attrib), repr(value) )
                if attrib.endswith("SchemaLocation"):
                    self.schemaLocation = value
                else:
                    logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) )
            BBB = C = V = None
            for element in self.tree:
                #print( "element", repr(element.tag) )
                sublocation = element.tag + " " + location
                if element.tag == 'languageCode':
                    self.languageCode = element.text
                    Globals.checkXMLNoTail( element, sublocation, 'cff3' )
                    Globals.checkXMLNoAttributes( element, sublocation, 'des1' )
                    Globals.checkXMLNoSubelements( element, sublocation, 'dwf2' )
                elif element.tag == 'book':
                    self.loadBook( element )
                    ##Globals.checkXMLNoSubelements( element, sublocation, '54f2' )
                    #Globals.checkXMLNoTail( element, sublocation, 'hd35' )
                    ## Process the attributes
                    #idField = bookStyle = None
                    #for attrib,value in element.items():
                        #if attrib=='id' or attrib=='code':
                            #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            ##if idField != BBB:
                            ##    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        #elif attrib=='style':
                            #bookStyle = value
                        #else:
                            #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                else:
                    logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) )
                    #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here
            if Globals.verbosityLevel > 2:
                print( "USFXXMLBible.load: Didn't find any regularly named USFX files in '{}'".format( self.sourceFolder ) )
            for thisFilename in foundFiles:
                # Look for BBB in the ID line (which should be the first line in a USFX file)
                isUSFX = False
                thisPath = os.path.join( self.sourceFolder, thisFilename )
                with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done
                    for line in possibleUSXFile:
                        if line.startswith( '\\id ' ):
                            USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id
                            if Globals.verbosityLevel > 2: print( "Have possible USFX ID '{}'".format( USXId ) )
                            BBB = Globals.BibleBooksCodes.getBBBFromUSFM( USXId )
                            if Globals.verbosityLevel > 2: print( "BBB is '{}'".format( BBB ) )
                            isUSFX = True
                        break # We only look at the first line
                if isUSFX:
                    UBB = USFXXMLBibleBook( self, BBB )
                    UBB.load( self.sourceFolder, thisFilename, self.encoding )
                    UBB.validateMarkers()
                    print( UBB )
                    self.books[BBB] = UBB
                    # Make up our book name dictionaries while we're at it
                    assumedBookNames = UBB.getAssumedBookNames()
                    for assumedBookName in assumedBookNames:
                        self.BBBToNameDict[BBB] = assumedBookName
                        assumedBookNameLower = assumedBookName.lower()
                        self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces)
            if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) )
        self.doPostLoadProcessing()
    # end of USFXXMLBible.load


    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if Globals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if Globals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'vsg3' )
                Globals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'jsa0' )
                Globals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                Globals.checkXMLNoTail( element, location, 'dj35' )
                Globals.checkXMLNoAttributes( element, location, 'hs35' )
                Globals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.appendLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                Globals.checkXMLNoTail( element, location, 'ss13' )
                Globals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.appendLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, C )
            elif element.tag == 'b':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoAttributes( element, location, 'nd04' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.appendLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'od01' )
                Globals.checkXMLNoAttributes( element, location, 'us91' )
                Globals.checkXMLNoSubelements( element, location, 'gd92' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if Globals.debugFlag: halt
        self.saveBook( self.thisBook )
    # end of USFXXMLBible.loadBook


    def loadParagraph( self, paragraphElement, paragraphLocation, C ):
        """
        Load the paragraph (p or q) container from the XML data file.
        """
        #if Globals.verbosityLevel > 3:
            #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        V = None
        pText = paragraphElement.text
        Globals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' )

        # Process the attributes first
        sfm = level = style = None
        for attrib,value in paragraphElement.items():
            if attrib == 'sfm':
                sfm = value
            elif attrib == 'level':
                level = value
            elif attrib == 'style':
                style = value
            else:
                logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) )

        for element in paragraphElement:
            location = element.tag + " of " + paragraphLocation
            #print( "element", repr(element.tag) )
            if element.tag == 'v': # verse milestone
                vTail = clean( element.tail ) # Main verse text
                Globals.checkXMLNoText( element, location, 'crc2' )
                Globals.checkXMLNoSubelements( element, location, 'lct3' )
                lastV, V = V, None
                for attrib,value in element.items():
                    if attrib == 'id':
                        V = value
                    else:
                        logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                assert( V is not None )
                assert( V )
                self.thisBook.appendLine( 'v', V + ((' '+vTail) if vTail else '' ) )
            elif element.tag == 've': # verse end milestone -- we can just ignore this
                Globals.checkXMLNoText( element, location, 'lsc3' )
                Globals.checkXMLNoTail( element, location, 'mfy4' )
                Globals.checkXMLNoAttributes( element, location, 'bd24' )
                Globals.checkXMLNoSubelements( element, location, 'ks35' )
            elif element.tag == 'fig':
                self.loadFigure( element, location )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) )
                self.loadFootnote( element, location )
            elif element.tag == 'x':
                #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) )
                self.loadCrossreference( element, location )
            elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                self.loadCharacterFormatting( element, location )
            elif element.tag == 'cs': # character style -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'kf92' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) )
            elif element.tag in ('cp',): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'kdf0' )
                Globals.checkXMLNoAttributes( element, location, 'lkj1' )
                Globals.checkXMLNoSubelements( element, location, 'da13' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'ref': # encoded reference -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'bd83' )
                target = None
                for attrib,value in element.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
                #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
            elif element.tag == 'optionalLineBreak':
                print( "What is loadParagraph optionalLineBreak?" )
                if Globals.debugFlag: halt
            elif element.tag == 'milestone':
                print( "What is loadParagraph milestone?" )
                if Globals.debugFlag: halt
            else:
                logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) )
        return V
    # end of USFXXMLBible.loadParagraph


    def loadCharacterFormatting( self, element, location ):
        """
        """
        marker, text, tail = element.tag, clean(element.text), clean(element.tail)
        Globals.checkXMLNoAttributes( element, location, 'sd12' )
        self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            #print( "element", repr(element.tag) )
            if subelement.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
                self.loadFootnote( subelement, sublocation )
            else:
                logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) )
                halt
        self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadCharacterFormatting


    def loadFigure( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'ff36' )
        Globals.checkXMLNoAttributes( element, location, 'cf35' )
        figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            figTag, figText = subelement.tag, clean(subelement.text)
            assert( figTag in figDict )
            figDict[figTag] = '' if figText is None else figText
            Globals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
            Globals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
            Globals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
        newString = ''
        for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
            newString += ('' if j==0 else '|') + figDict[tag]
        figTail = clean( element.tail )
        self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
    # end of USFXXMLBible.loadFigure


    def loadTable( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'kg92' )
        Globals.checkXMLNoTail( element, location, 'ka92' )
        Globals.checkXMLNoAttributes( element, location, 'ks63' )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            if subelement.tag == 'tr':
                #print( "table", sublocation )
                self.thisBook.appendLine( 'tr', '' )
                Globals.checkXMLNoText( subelement, sublocation, 'sg32' )
                Globals.checkXMLNoTail( subelement, sublocation, 'dh82' )
                Globals.checkXMLNoAttributes( subelement, sublocation, 'mniq' )
                for sub2element in subelement:
                    sub2location = sub2element.tag + " of " + sublocation
                    tag, text = sub2element.tag, clean(sub2element.text)
                    assert( tag in ('th', 'thr', 'tc', 'tcr',) )
                    Globals.checkXMLNoTail( sub2element, sub2location, 'ah82' )
                    Globals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' )
                    level = None
                    for attrib,value in sub2element.items():
                        if attrib == 'level': level = value
                        else:
                            logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    marker = tag + (level if level else '')
                    self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
            else:
                logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) )
    # end of USFXXMLBible.loadTable


    def loadFootnote( self, element, location ):
        """
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',):
                #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) )
            if Globals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',) )
            if marker=='ref':
                assert( fText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 'ls13' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'dq54' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) )
                if marker[0] == 'f': # Starts with f, e.g., fr, ft
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'js72' )
                        if marker2=='ref':
                            print( sub2location )
                            assert( not fText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if fTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) )
        self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadFootnote


    def loadCrossreference( self, element, location ):
        """
        Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
                #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
            if Globals.debugFlag: assert( marker in ('ref','xo','xt',) )
            if marker=='ref':
                assert( xText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
                if marker[0] == 'x': # Starts with x, e.g., xo, xt
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                        if marker2=='ref':
                            assert( not xText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if xTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
        self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )

예제 #22

파일 보기

class GreekNT( Bible ):
    """
    Class for handling a Greek NT object (which may contain one or more Bible books)

    Note: BBB is used in this class to represent the three-character referenceAbbreviation.
    """
    def __init__( self, sourceFilepath, givenName=None, encoding='utf-8' ):
        """
        Constructor: expects the filepath of the source folder.
        Loads (and crudely validates the file(s)) into ???.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = 'Greek NT Bible object'
        self.objectTypeString = 'GreekNT'

        # Now we can set our object variables
        self.sourceFilepath, self.givenName, self.encoding  = sourceFilepath, givenName, encoding

        self.title = self.version = self.date = None
        self.XMLTree = self.header = self.frontMatter = self.divs = self.divTypesString = None
        #self.bkData, self.USFMBooks = OrderedDict(), OrderedDict()
        self.lang = self.language = None

        # Do a preliminary check on the readability of our files
        self.possibleFilenames = []
        if os.path.isdir( self.sourceFilepath ): # We've been given a folder -- see if we can find the files
            # There's no standard for OSIS xml file naming
            fileList = os.listdir( self.sourceFilepath )
            #print( len(fileList), fileList )
            # First try looking for OSIS book names
            for filename in fileList:
                if filename.lower().endswith('.txt'):
                    thisFilepath = os.path.join( self.sourceFilepath, filename )
                    #if BibleOrgSysGlobals.debugFlag: print( "Trying {}…".format( thisFilepath ) )
                    if os.access( thisFilepath, os.R_OK ): # we can read that file
                        self.possibleFilenames.append( filename )
        elif not os.access( self.sourceFilepath, os.R_OK ):
            logging.critical( "GreekNT: File {!r} is unreadable".format( self.sourceFilepath ) )
            return # No use continuing
        #print( self.possibleFilenames ); halt

        self.name = self.givenName
        #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML
        #gNTfc.loadMorphGNT()
        #self.books = gNTfc.bookData
    # end of __init__


    #def x__str__( self ):
        #"""
        #This method returns the string representation of a Bible book code.

        #@return: the name of a Bible object formatted as a string
        #@rtype: string
        #"""
        #result = "Greek Bible converter object"
        ##if self.title: result += ('\n' if result else '') + self.title
        ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version )
        ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date )
        #if len(self.books)==1:
            #for BBB in self.books: break # Just get the first one
            #result += ('\n' if result else '') + "  " + _("Contains one book: {}").format( BBB )
        #else: result += ('\n' if result else '') + "  " + _("Number of books = {}").format( len(self.books) )
        #return result
    ## end of __str__


    def loadBooks( self ):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "Loading Greek NT from {}…".format( self.sourceFilepath ) )
        for BBB in Greek.morphgntBooks:
            self.loadBook( BBB, Greek.morphgntFilenames[BBB] )
        if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} books loaded.".format( len(self.books) ) )
        #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book
            #for filename in self.possibleFilenames:
                #pathname = os.path.join( self.sourceFilepath, filename )
                #self.loadBook( pathname )
        #else: # most often we have all the Bible books in one file
            #self.loadFile( self.sourceFilepath )
        self.doPostLoadProcessing()
    # end of loadBooks

    def load( self ):
        self.loadBooks()


    def loadBook( self, BBB, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert len(bits) == 7
            #print( bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #print( b, c, v )

            POSCode = bits[1]
            assert len(POSCode) == 2
            assert POSCode in Greek.POSCodes.keys()

            parsingCode = bits[2]
            assert len(parsingCode) == 8
            #print( parsingCode )
            for j,char in enumerate(parsingCode):
                assert char in Greek.parsingCodes[j]
            assert parsingCode[0] in Greek.personCodes
            assert parsingCode[1] in Greek.tenseCodes
            assert parsingCode[2] in Greek.voiceCodes
            assert parsingCode[3] in Greek.modeCodes
            assert parsingCode[4] in Greek.caseCodes
            assert parsingCode[5] in Greek.numberCodes
            assert parsingCode[6] in Greek.genderCodes
            assert parsingCode[7] in Greek.degreeCodes

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = 'Morph Greek NT Bible Book object'
        self.thisBook.objectTypeString = 'MorphGNT'
        filepath = os.path.join( self.sourceFilepath, filename )
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Loading {}…".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #print ( 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #print( unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.addLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.addLine( 'v', vn )
                        lastV = vn
                    self.thisBook.addLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.addLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #print( reference,bits[1],bits[2] ); halt
            #if 0: #except:
                #logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                #if lineCount > 1: print( 'Previous line was: ', lastLine )
                #else: print( 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            if BibleOrgSysGlobals.verbosityLevel > 3: print( "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.stashBook( self.thisBook )
            #self.books[BBB] = self.thisBook
    # end of loadBook


    def analyzeWords( self ):
        """
        Go through the NT data and do some filing and sorting of the Greek words.

        Used by the interlinearizer app.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print( "analyzeWords: have {} books in the loaded NT".format( len(self.books) ) )

        self.wordCounts = {} # Wordcount organised by BBB
        self.wordCounts['Total'] = 0
        self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {}
        for BBB in self.books:
            wordCount = len(self.books[BBB])
            self.wordCounts[BBB] = wordCount
            self.wordCounts['Total'] += wordCount
            if BibleOrgSysGlobals.verbosityLevel > 3: print( "  analyzeWords: {} has {} Greek words".format( BBB, wordCount ) )
            for reference,parsing,(punctuatedWord,actualWord,normalizedWord,lemma) in self.books[BBB]: # Stuff is: reference,parsing,words
                # File the actual words
                if actualWord not in self.actualWordsToNormalized:
                    self.actualWordsToNormalized[actualWord] = [([reference],normalizedWord,)]
                    #print( "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] )
                else: # we've already had this word before
                    previous = self.actualWordsToNormalized[actualWord]
                    #print( "had", actualWord, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        #print( "  Found a new", normalizedWord, "normalized word for", actualWord, "was", previous )
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.actualWordsToNormalized[actualWord] = newList
                        #print( "  now have", newList )
                # File the normalized words
                if normalizedWord not in self.normalizedWordsToActual:
                    self.normalizedWordsToActual[normalizedWord] = [([reference],actualWord,)]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToActual[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, actualWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldActualWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldActualWord )
                        if actualWord == oldActualWord:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldActualWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldActualWord,) )
                    if not found:
                        newList.append( ([reference],actualWord,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToActual[normalizedWord] = newList
                        #print( "  now have", newList )
                if normalizedWord not in self.normalizedWordsToParsing:
                    self.normalizedWordsToParsing[normalizedWord] = [([reference],parsing,)]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToParsing[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, parsing )
                    found = changed = False
                    newList = []
                    for oldRefList,oldParsing in previous:
                        #print( "  oRL", oldRefList, "oP", oldParsing )
                        if parsing == oldParsing:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldParsing,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldParsing,) )
                    if not found:
                        newList.append( ([reference],parsing,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToParsing[normalizedWord] = newList
                        #print( "  now have", newList )
                # File the self.lemmasToNormalizedWords
                if lemma not in self.lemmasToNormalizedWords:
                    self.lemmasToNormalizedWords[lemma] = [([reference],normalizedWord,)]
                    #print( "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] )
                else: # we've already had this word before
                    previous = self.lemmasToNormalizedWords[lemma]
                    #print( "had", lemma, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.lemmasToNormalizedWords[lemma] = newList
                        #print( "  now have", newList )
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek words".format( self.wordCounts['Total'] ) )
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} actual Greek words".format( len(self.actualWordsToNormalized) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                print( "  ", aW, self.actualWordsToNormalized[aW] )
                if j==6: break
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToActual) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToActual.keys() ):
                print( "  ", nW, self.normalizedWordsToActual[nW] )
                if j==6: break
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToParsing) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToParsing.keys() ):
                print( "  ", nW, self.normalizedWordsToParsing[nW] )
                if j==6: break
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".format( len(self.lemmasToNormalizedWords) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,lem in enumerate( self.lemmasToNormalizedWords.keys() ):
                print( "  ", lem, self.lemmasToNormalizedWords[lem] )
                if j==6: break
        if 0:
            print( "The following actual words have multiple normalized forms:" )
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                if len(self.actualWordsToNormalized[aW])>1:
                    print( "  ", aW )
                    for entry in self.actualWordsToNormalized[aW]:
                        print( "    ", entry[1], self.normalizedWordsToParsing[entry[1]], entry[0] )

예제 #23

파일 보기

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None
        lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search(
                        '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line)
                    if match: vplType = 1
                    else:
                        match = re.search('^(\\d{8})\\s', line)
                        if match: vplType = 2
                        else:
                            match = re.search('^# language_name:\\s', line)
                            if match: vplType = 3
                            #else:
                            #match = re.search( '^; TITLE:\\s', line )
                            #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print(
                                "First line got type #{} {!r} match from {!r}".
                                format(vplType, match.group(0), line))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "VPLBible.load: (unexpected) first line was {!r} in {}"
                                .format(line, self.sourceFilepath))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue
                    #print( 'vplType', vplType )

                #print ( 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if line.startswith('# language_name:'):
                        string = line[16:].strip()
                        if string and string != 'Not available':
                            settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith('# closest ISO 639-3:'):
                        string = line[20:].strip()
                        if string and string != 'Not available':
                            settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith('# year_short:'):
                        string = line[13:].strip()
                        if string and string != 'Not available':
                            settingsDict['Year.short'] = string
                        continue
                    elif line.startswith('# year_long:'):
                        string = line[12:].strip()
                        if string and string != 'Not available':
                            settingsDict['Year.long'] = string
                        continue
                    elif line.startswith('# title:'):
                        string = line[8:].strip()
                        if string and string != 'Not available':
                            settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith('# URL:'):
                        string = line[6:].strip()
                        if string and string != 'Not available':
                            settingsDict['URL'] = string
                        continue
                    elif line.startswith('# copyright_short:'):
                        string = line[18:].strip()
                        if string and string != 'Not available':
                            settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith('# copyright_long:'):
                        string = line[17:].strip()
                        if string and string != 'Not available':
                            settingsDict['Copyright.long'] = string
                        continue
                    elif line[0] == '#':
                        logging.warning(
                            "VPLBible.load {} is skipping unknown line: {}".
                            format(vplType, line))
                        continue  # Just discard comment lines
                #elif vplType == 4:
                #if line.startswith( '; TITLE:' ):
                #string = line[8:].strip()
                #if string: settingsDict['TITLE'] = string
                #continue
                #elif line.startswith( '; ABBREVIATION:' ):
                #string = line[15:].strip()
                #if string: settingsDict['ABBREVIATION'] = string
                #continue
                #elif line.startswith( '; HAS ITALICS:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_ITALICS'] = string
                #continue
                #elif line.startswith( '; HAS FOOTNOTES:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_FOOTNOTES'] = string
                #continue
                #elif line.startswith( '; HAS FOOTNOTES' ):
                #string = line[14:].strip()
                #if string: settingsDict['HAS_FOOTNOTES'] = string
                #continue
                #elif line.startswith( '; HAS REDLETTER:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_REDLETTER'] = string
                #continue
                #elif line[0]==';':
                #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split(' ', 2)
                    #print( self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCodeText, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split(
                            ':')
                        #print( "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) )
                        if chapterNumberString == '':
                            chapterNumberString = '1'  # Handle a bug in some single chapter books in VPL
                    else:
                        print("Unexpected number of bits", self.givenName, BBB,
                              bookCodeText, chapterNumberString,
                              verseNumberString, len(bits), bits)

                    if not bookCodeText and not chapterNumberString and not verseNumberString:
                        print("Skipping empty line in {} {} {} {}:{}".format(
                            self.givenName, BBB, bookCodeText,
                            chapterNumberString, verseNumberString))
                        continue
                    if BibleOrgSysGlobals.debugFlag:
                        assert 2 <= len(bookCodeText) <= 4
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error(
                            "Invalid verse number field at {}/{} {}:{!r}".
                            format(bookCodeText, BBB, chapterNumberString,
                                   verseNumberString))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int(chapterNumberString)
                    verseNumber = int(verseNumberString)

                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        lastBBB = BBB
                        #if bookCodeText in ('Ge',): BBB = 'GEN'
                        if bookCodeText == 'Le' and lastBBB == 'GEN':
                            BBB = 'LEV'
                        elif bookCodeText in ('Jud', ) and lastBBB == 'JOS':
                            BBB = 'JDG'
                            #elif bookCodeText in ('Es',): BBB = 'EST'
                            #elif bookCodeText in ('Pr',): BBB = 'PRO'
                            #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG'
                            #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM'
                            #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP'
                            #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT
                            #elif bookCodeText in ('Jude',): BBB = 'JDE'
                            #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ'
                            #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN'
                        else:
                            BBB = BOS66.getBBBFromText(
                                bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BOS81.getBBBFromText(
                                    bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BOSx.getBBBFromText(
                                    bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText(
                                    bookCodeText)  # Try to guess
                        if not BBB:
                            logging.critical(
                                "VPL Bible: Unable to determine book code from text {!r} after {!r}={}"
                                .format(bookCodeText, lastBookCodeText,
                                        lastBBB))
                            halt

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0] == '«':
                        #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB == 'PSA' and verseNumberString == '1':  # Psalm title
                            vBits = vText[1:].split('»')
                            #print( "vBits", vBits )
                            thisBook.addLine('d', vBits[0])  # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                    #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #continue
                    if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                            '&lt;') and self.givenName == 'basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                    #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                    #if vText == lastVText:
                    #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #else:
                    #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                elif vplType in (2, 3):
                    bits = line.split('\t', 1)
                    #print( self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[
                        0][:2], bits[0][2:5], bits[0][5:]
                    #print( bookNumberString, chapterNumberString, verseNumberString )
                    while len(chapterNumberString
                              ) > 1 and chapterNumberString[0] == '0':
                        chapterNumberString = chapterNumberString[
                            1:]  # Remove leading zeroes
                    while len(verseNumberString
                              ) > 1 and verseNumberString[0] == '0':
                        verseNumberString = verseNumberString[
                            1:]  # Remove leading zeroes
                    bookCodeText, chapterNumber, verseNumber = int(
                        bookNumberString), int(chapterNumberString), int(
                            verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        lastBBB = BBB
                        bnDict = {
                            67: 'TOB',
                            68: 'JDT',
                            69: 'ESG',
                            70: 'WIS',
                            71: 'SIR',
                            72: 'BAR',
                            73: 'LJE',
                            74: 'PAZ',
                            75: 'SUS',
                            76: 'BEL',
                            77: 'MA1',
                            78: 'MA2',
                            79: 'MA3',
                            80: 'MA4',
                            81: 'ES1',
                            82: 'ES2',
                            83: 'MAN',
                            84: 'PS2',
                            85: 'PSS',
                            86: 'ODE',
                        }
                        if 1 <= bookCodeText <= 66:
                            BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                                bookCodeText)
                        else:
                            BBB = bnDict[bookCodeText]

                #elif vplType == 4:
                #if line.startswith( '$$ ' ):
                #if metadataName and metadataContents:
                #settingsDict[metadataName] = metadataContents
                #metadataName = None
                #pointer = line[3:]
                ##print( "pointer", repr(pointer) )
                #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                #metadataName = pointer[1:-1]
                #if metadataName:
                ##print( "metadataName", repr(metadataName) )
                #metadataContents = ''
                #else: # let's assume it's a BCV reference
                #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                #B_CV_Bits = pointer.split( ' ', 1 )
                #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                #bookCodeText, CVString = B_CV_Bits
                #chapterNumberString, verseNumberString = CVString.split( ':' )
                #chapterNumber = int( chapterNumberString )
                #verseNumber = int( verseNumberString )
                #if bookCodeText != lastBookCodeText: # We've started a new book
                #if bookCodeText in ('Ge',): BBB = 'GEN'
                #elif bookCodeText in ('Le',): BBB = 'LEV'
                #elif bookCodeText in ('La',): BBB = 'LAM'
                #else:
                ##print( "4bookCodeText =", repr(bookCodeText) )
                ##BBB = BOS.getBBBFromText( bookCodeText )  # Try to guess
                #BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                ##print( "4BBB =", repr(BBB) )
                #else: print( "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )
                #continue # Just save the pointer information which refers to the text on the next line
                #else: # it's not a $$ line
                #text = line
                ##print( "text", repr(text) )
                #if metadataName:
                #metadataContents += ('\n' if metadataContents else '') + text
                #continue
                #else:
                #vText = text
                ## Handle bits like (<scripref>Pr 2:7</scripref>)
                #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                ##if '\\' in vText: print( 'VPL vText', repr(vText) )
                #if vplType == 4: # Forge for SwordSearcher
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                ## Convert {stuff} to footnotes
                #match = re.search( '\\{(.+?)\\}', vText )
                #while match:
                #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                #match = re.search( '\\{(.+?)\\}', vText )
                ## Convert [stuff] to added fields
                #match = re.search( '\\[(.+?)\\]', vText )
                #while match:
                #addText = '\\add {}\\add*'.format( match.group(1) )
                #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                #match = re.search( '\\[(.+?)\\]', vText )
                #for badChar in '{}[]':
                #if badChar in vText:
                #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                #break

                else:
                    logging.critical('Unknown VPL type {}'.format(vplType))
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                        halt

                if bookCodeText:
                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        if lastBookCodeText is not None:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCodeText = bookCodeText
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "VPLBible{} could not figure out {!r} book code"
                                .format(vplType, bookCodeText))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCodeText,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCodeText,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCodeText,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCodeText yet
                    logging.warning(
                        "VPLBible.load{} is skipping unknown pre-book line: {}"
                        .format(vplType, line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #print( "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata('VPL')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()

예제 #24

파일 보기

파일: CSVBible.py 프로젝트: beniza/BibleOrgSys

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      CSVBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if line==' ': continue # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0]=='#': continue # Just discard comment lines
                if lineCount==1:
                    if line.startswith( '"Book",' ):
                        quoted = True
                        continue # Just discard header line
                    elif line.startswith( 'Book,' ):
                        quoted = False
                        continue # Just discard header line

                bits = line.split( ',', 3 )
                #print( lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else: print( "Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits )

                # Remove quote marks from these strings
                if quoted:
                    if len(bString)>=2 and bString[0]==bString[-1] and bString[0] in '"\'': bString = bString[1:-1]
                    if len(chapterNumberString)>=2 and chapterNumberString[0]==chapterNumberString[-1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString)>=2 and verseNumberString[0]==verseNumberString[-1] and verseNumberString[0] in '"\'': verseNumberString = verseNumberString[1:-1]
                    if len(vText)>=2 and vText[0]==vText[-1] and vText[0] in '"\'': vText = vText[1:-1]
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                    #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if Globals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                #if Globals.debugFlag: assert( chapterNumberString.isdigit() )
                #if Globals.debugFlag: assert( verseNumberString.isdigit() )
                bookNumber = int( bString )
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookNumber != lastBookNumber: # We've started a new book
                    if lastBookNumber != -1: # Better save the last book
                        self.saveBook( thisBook )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )  # Try to guess
                    assert( BBB )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "CSV Bible Book object"
                    thisBook.objectTypeString = "CSV"
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace( '\\ldblquote', '“' ).replace( '\\rdblquote', '”' ).replace( '\\lquote', '‘' ).replace( '\\rquote', '’' )
                vText = vText.replace( '\\emdash', '—' ).replace( '\\endash', '–' )
                # Now do Unicode characters
                while True: # Find patterns like \\'d3
                    match = re.search( r"\\'[0-9a-f][0-9a-f]", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()], 16 ) # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                while True: # Find patterns like \\u253?
                    match = re.search( r"\\u[1-2][0-9][0-9]\?", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()-1] ) # Convert three digits to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                    #assert( BBB=='PSA' and verseNumberString=='1' )
                    #vBits = vText[1:].split( '»' )
                    ##print( "vBits", vBits )
                    #thisBook.appendLine( 'd', vBits[0] ) # Psalm title
                    #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()

예제 #25

파일 보기

    def load(self):
        """
        Load the compressed data file and import book elements.
        """
        import zlib
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print(_("\nLoading {}…").format(self.sourceFilepath))
        with open(self.sourceFilepath,
                  'rb') as myFile:  # Automatically closes the file when done
            fileBytes = myFile.read()
        if BibleOrgSysGlobals.debugFlag:
            print("  {:,} bytes read".format(len(fileBytes)))

        keep = {}
        index = 0
        #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] )
        keep['block1'] = fileBytes[index:index + 32]
        hString = ''
        for j in range(0, 32):
            char8 = fileBytes[index + j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            hString += chr(char8)
        if BibleOrgSysGlobals.debugFlag:
            print('block1b', hexlify(fileBytes[index + j:index + 32]))
        # Skipped some (important?) binary here
        index += 32
        if BibleOrgSysGlobals.debugFlag: print('hString', repr(hString), index)
        assert hString == 'EasyWorship Bible Text'

        #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] )
        keep['block2'] = fileBytes[index:index + 56]
        nString = ''
        for j in range(0, 32):
            char8 = fileBytes[index + j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            nString += chr(char8)
        # Skipped some zeroes here
        index += 56
        if BibleOrgSysGlobals.debugFlag: print('nString', repr(nString), index)
        self.name = nString

        rawBooks = []
        for b in range(1, 66 + 1):
            bookAbbrev = ''
            for j in range(0, 32):
                char8 = fileBytes[index + j]
                #print( char8, repr(char8) )
                if char8 < 0x20: break
                bookAbbrev += chr(char8)
            # Skipped some zeroes here
            index += 51
            if bookAbbrev and bookAbbrev[-1] == '.':
                bookAbbrev = bookAbbrev[:-1]  # Remove final period
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print('bookAbbrev', repr(bookAbbrev))
            numChapters = fileBytes[index]
            numVerses = []
            for j in range(0, numChapters):
                numVerses.append(fileBytes[index + j + 1])
            # Skipped some zeroes here
            index += 157
            if BibleOrgSysGlobals.debugFlag:
                print(' ', numChapters, numVerses)
            bookStart, = struct.unpack("<I", fileBytes[index:index + 4])
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print('  bookStart', bookStart)
            bookLength, = struct.unpack("<I", fileBytes[index:index + 4])
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print('  bookLength', bookLength, bookStart + bookLength)
            bookBytes = fileBytes[bookStart:bookStart + bookLength]
            assert bookBytes[
                0] == 0x78 and bookBytes[1] == 0xda  # Zlib compression header
            rawBooks.append((bookAbbrev, numChapters, numVerses, bookStart,
                             bookLength, bookBytes))

        if BibleOrgSysGlobals.debugFlag:
            print('unknown block3', index,
                  hexlify(fileBytes[index:index + 30]))
        keep['block3'] = fileBytes[index:index + 30]
        length3, = struct.unpack("<I", fileBytes[index:index + 4])
        if length3:
            block3 = fileBytes[index + 4:index + 4 + length3 - 4]
            byteResult = zlib.decompress(block3)
            textResult = byteResult.decode('utf8')
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print("Got", len(textResult), textResult, 'from', length3)
            keep['block3n'] = textResult
            if self.name:
                print('Overwriting module name {!r} with {!r}'.format(
                    self.name, textResult))
            self.name = textResult
        index += length3
        if BibleOrgSysGlobals.debugFlag:
            print('end of contents', index,
                  hexlify(fileBytes[index:index + 60]))
        keep['block4'] = rawBooks[0][3]

        block5 = fileBytes[index:rawBooks[0][3]]
        keep['block5'] = block5
        index += len(block5)
        #if self.abbreviation in ( 'TB', ): # Why don't the others work
        assert index == rawBooks[0][
            3]  # Should now be at the start of the first book (already fetched above)

        assert len(rawBooks) == 66
        # Look at extra stuff at end
        endBytes = fileBytes[bookStart + bookLength:]
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print('endBytes', len(endBytes), hexlify(endBytes), endBytes)
        assert len(endBytes) == 16
        keep['block9'] = endBytes
        # Skipped some binary and some text here
        del fileBytes

        # Now we have to decode the book text (compressed about 4x with zlib)
        for j, BBB in enumerate(BOS.getBookList()):
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print('  Decoding {}…'.format(BBB))
            bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[
                j]
            byteResult = zlib.decompress(bookBytes)
            textResult = byteResult.decode('utf8')
            if '\t' in textResult:
                logging.warning("Replacing tab characters in {} = {}".format(
                    BBB, bookAbbrev))
                textResult = textResult.replace('\t', ' ')
            #print( textResult )
            if BibleOrgSysGlobals.strictCheckingFlag:
                assert '  ' not in textResult

            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'EasyWorship Bible Book object'
            thisBook.objectTypeString = 'EasyWorship Bible'
            if bookAbbrev: thisBook.addLine('toc3', bookAbbrev)

            C, V = '0', '-1'  # So id line starts at 0:0
            for line in textResult.split('\r\n'):
                if not line: continue  # skip blank lines
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print('Processing {} {} line: {!r}'.format(
                        self.abbreviation, BBB, line))
                assert line[0].isdigit()
                assert ':' in line[:4]
                CV, verseText = line.split(' ', 1)
                newC, newV = CV.split(':')
                #print( newC, V, repr(verseText) )
                if newC != C:
                    if self.abbreviation == 'hcsb' and BBB in (
                            'SA2',
                    ):  # Handle a bad bug -- chapter 24 has verses out of order
                        print(
                            "Skipping error for out-of-order chapters in {}!".
                            format(BBB))
                    else:
                        assert int(newC) > int(C)
                    C, V = newC, '0'
                    thisBook.addLine('c', C)
                if self.abbreviation == 'TB' and BBB == 'JOL':  # Handle a bug -- chapter 3 repeats
                    if int(newV) < int(V): break
                elif self.abbreviation == 'rsv' and BBB in (
                        'EXO',
                        'HAG',
                ):  # Handle a bug -- chapter 22 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                elif self.abbreviation == 'gnt' and BBB in (
                        'ISA',
                        'ZEC',
                        'MRK',
                ):  # Handle a bug -- chapter 38 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                elif self.abbreviation == 'hcsb' and BBB in (
                        'SA2',
                ):  # Handle a bug -- chapter 24 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                elif self.abbreviation == 'msg' and BBB in (
                        'NUM',
                        'JDG',
                        'SA2',
                        'CH2',
                        'EZE',
                        'ACT',
                ):  # Handle a bug -- chapter 24 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                else:
                    try:
                        assert int(newV) > int(V)
                    except ValueError:
                        if BibleOrgSysGlobals.debugFlag:
                            print(
                                "Something's not an integer around {} {}:{} {}"
                                .format(BBB, C, V, verseText))
                V = newV
                thisBook.addLine('v', V + ' ' + verseText)

            if BibleOrgSysGlobals.verbosityLevel > 3: print("Saving", BBB)
            self.stashBook(thisBook)

        self.doPostLoadProcessing()
        return keep

예제 #26

파일 보기

파일: UnboundBible.py 프로젝트: alerque/BibleOrgSys

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      UnboundBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'UB file line is "' + line + '"' )
                if line[0]=='#':
                    hashBits = line[1:].split( '\t' )
                    if len(hashBits)==2 and hashBits[1]: # We have some valid meta-data
                        if hashBits[0] == 'name': self.name = hashBits[1]
                        elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        elif hashBits[0] == 'language': self.language = hashBits[1]
                        elif hashBits[0] == 'note': self.note = hashBits[1]
                        elif hashBits[0] == 'columns': self.columns = hashBits[1]
# Should some of these be placed into self.settingsDict???
                        logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue # Just discard comment lines

                bits = line.split( '\t' )
                #print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_' ):
                    logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt

                if NRSVA_bookCode: assert( len(NRSVA_bookCode) == 3 )
                if NRSVA_chapterNumberString: assert( NRSVA_chapterNumberString.isdigit() )
                if NRSVA_verseNumberString: assert( NRSVA_verseNumberString.isdigit() )

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BibleOrgSysGlobals.debugFlag: assert( len(bookCode) == 3 )
                if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() )
                if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() )

                if subverseNumberString:
                    logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) )

                vText = vText.strip() # Remove leading and trailing spaces
                if not vText: continue # Just ignore blank verses I think
                if vText == '+': continue # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag: assert( sequenceNumberString.isdigit() )
                    sequenceNumber = int( sequenceNumberString )
                    if BibleOrgSysGlobals.debugFlag: assert( sequenceNumber > lastSequence or \
                        self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) ) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode: # We've started a new book
                    if lastBookCode != -1: # Better save the last book
                        self.saveBook( thisBook )
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = 'Unbound Bible Book object'
                    thisBook.objectTypeString = 'Unbound'
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    thisBook.addLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()

예제 #27

파일 보기

파일: USFXXMLBible.py 프로젝트: beniza/BibleOrgSys

    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if Globals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if Globals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'vsg3' )
                Globals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'jsa0' )
                Globals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                Globals.checkXMLNoTail( element, location, 'dj35' )
                Globals.checkXMLNoAttributes( element, location, 'hs35' )
                Globals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.appendLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                Globals.checkXMLNoTail( element, location, 'ss13' )
                Globals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.appendLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, C )
            elif element.tag == 'b':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoAttributes( element, location, 'nd04' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.appendLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'od01' )
                Globals.checkXMLNoAttributes( element, location, 'us91' )
                Globals.checkXMLNoSubelements( element, location, 'gd92' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if Globals.debugFlag: halt
        self.saveBook( self.thisBook )

예제 #28

파일 보기

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        loadErrors = []

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a e-Sword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a e-Sword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        # Just get some information from the file
        cursor.execute( 'select * from Bible' )
        rows = cursor.fetchall()
        numRows = len(rows)
        if Globals.debugFlag or Globals.verbosityLevel>2: print( '{} rows found'.format( numRows ) )
        BBBn1 = rows[0][0]
        if Globals.debugFlag or Globals.verbosityLevel>2: print( 'First book number is {}'.format( BBBn1 ) )
        del rows
        BBB1 = None
        if BBBn1 <= 66: BBB1 = Globals.BibleBooksCodes.getBBBFromReferenceNumber( BBBn1 )


        testament = BBB = None
        booksExpected = textLineCountExpected = 0
        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957
        elif self.settingsDict['Abbreviation'] == 'VIN2011': # Handle encoding error
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['Apocrypha']: # incomplete
            testament, BBB = 'AP', 'XXX'
            booksExpected, textLineCountExpected = 99, 999999
            halt
        if not BBB:
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            if 0:
                cursor.execute( 'select * from Bible' )
                rows = cursor.fetchall()
                print( "rows", len(rows) )
                for row in rows:
                    assert( len(row) == 4 )
                    BBBn, C, V, text = row # First three are integers, the last is a string
                    print( BBBn, C, V, repr(text) )
                    if C==2: break
                del rows # Takes a lot of memory
        if Globals.debugFlag or Globals.verbosityLevel>2:
            print( "Testament={} BBB={} BBB1={}, bE={}, tLCE={} nR={}".format( testament, BBB, BBB1, booksExpected, textLineCountExpected, numRows ) )
        if BBB1 != BBB:
            logging.critical( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            loadErrors.append( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            if not BBB: BBB = BBB1
        if numRows != textLineCountExpected:
            logging.critical( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
            loadErrors.append( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
        #halt

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self.name, BBB )
        thisBook.objectNameString = "e-Sword Bible Book object"
        thisBook.objectTypeString = "e-Sword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except: # This reference is missing
                #print( "something wrong at", BBB, C, V )
                #if Globals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'e-Sw file line is "' + line + '"' )
            if line is None: logging.warning( "ESwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "ESwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "ESwordBible.load: Probably encrypted module: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                        break
                elif not line: logging.warning( "ESwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    if '\r' in line or '\n' in line:
                        if Globals.debugFlag:
                            logging.warning( "ESwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                        #print( repr(line) )
                    while line and line[-1] in '\r\n': line = line[:-1] # Remove CR/LFs from the end
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) # Replace CR/LFs in the middle

            #print( "e-Sword.load", BBB, C, V, repr(line) )
            self.handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self.name, BBB )
                    thisBook.objectNameString = "e-Sword Bible Book object"
                    thisBook.objectTypeString = "e-Sword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.appendLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.appendLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.appendLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        if Globals.strictCheckingFlag or Globals.debugFlag: self.checkForExtraMaterial( cursor, BOS )
        cursor.close()
        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        self.doPostLoadProcessing()

예제 #29

파일 보기

파일: GreekNT.py 프로젝트: dimleyk/BibleOrgSys

class GreekNT( Bible ):
    """
    Class for handling a Greek NT object (which may contain one or more Bible books)

    Note: BBB is used in this class to represent the three-character referenceAbbreviation.
    """
    def __init__( self, sourceFilepath, givenName=None, encoding='utf-8' ):
        """
        Constructor: expects the filepath of the source folder.
        Loads (and crudely validates the file(s)) into ???.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "Greek NT Bible object"
        self.objectTypeString = "GreekNT"

        # Now we can set our object variables
        self.sourceFilepath, self.givenName, self.encoding  = sourceFilepath, givenName, encoding

        self.title = self.version = self.date = None
        self.tree = self.header = self.frontMatter = self.divs = self.divTypesString = None
        #self.bkData, self.USFMBooks = OrderedDict(), OrderedDict()
        self.lang = self.language = None

        # Do a preliminary check on the readability of our files
        self.possibleFilenames = []
        if os.path.isdir( self.sourceFilepath ): # We've been given a folder -- see if we can find the files
            # There's no standard for OSIS xml file naming
            fileList = os.listdir( self.sourceFilepath )
            #print( len(fileList), fileList )
            # First try looking for OSIS book names
            for filename in fileList:
                if filename.lower().endswith('.txt'):
                    thisFilepath = os.path.join( self.sourceFilepath, filename )
                    #if Globals.debugFlag: print( "Trying {}...".format( thisFilepath ) )
                    if os.access( thisFilepath, os.R_OK ): # we can read that file
                        self.possibleFilenames.append( filename )
        elif not os.access( self.sourceFilepath, os.R_OK ):
            logging.critical( "GreekNT: File '{}' is unreadable".format( self.sourceFilepath ) )
            return # No use continuing
        #print( self.possibleFilenames ); halt

        self.name = self.givenName
        #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML
        #gNTfc.loadMorphGNT()
        #self.books = gNTfc.bookData
    # end of __init__


    #def x__str__( self ):
        #"""
        #This method returns the string representation of a Bible book code.

        #@return: the name of a Bible object formatted as a string
        #@rtype: string
        #"""
        #result = "Greek Bible converter object"
        ##if self.title: result += ('\n' if result else '') + self.title
        ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version )
        ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date )
        #if len(self.books)==1:
            #for BBB in self.books: break # Just get the first one
            #result += ('\n' if result else '') + "  " + _("Contains one book: {}").format( BBB )
        #else: result += ('\n' if result else '') + "  " + _("Number of books = {}").format( len(self.books) )
        #return result
    ## end of __str__


    def load( self ):
        if Globals.verbosityLevel > 2: print( "Loading Greek NT from {}...".format( self.sourceFilepath ) )
        for BBB in Greek.morphgntBooks:
            self.loadBook( BBB, Greek.morphgntFilenames[BBB] )
        if Globals.verbosityLevel > 3: print( "{} books loaded.".format( len(self.books) ) )
        #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book
            #for filename in self.possibleFilenames:
                #pathname = os.path.join( self.sourceFilepath, filename )
                #self.loadBook( pathname )
        #else: # most often we have all the Bible books in one file
            #self.loadFile( self.sourceFilepath )
        self.doPostLoadProcessing()
    # end of load


    def loadBook( self, BBB, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert( len(bits) == 7 )
            #print( bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #print( b, c, v )

            POSCode = bits[1]
            assert( len(POSCode) == 2 )
            assert( POSCode in Greek.POSCodes.keys() )

            parsingCode = bits[2]
            assert( len(parsingCode) == 8 )
            #print( parsingCode )
            for j,char in enumerate(parsingCode):
                assert( char in Greek.parsingCodes[j] )
            assert( parsingCode[0] in Greek.personCodes )
            assert( parsingCode[1] in Greek.tenseCodes )
            assert( parsingCode[2] in Greek.voiceCodes )
            assert( parsingCode[3] in Greek.modeCodes )
            assert( parsingCode[4] in Greek.caseCodes )
            assert( parsingCode[5] in Greek.numberCodes )
            assert( parsingCode[6] in Greek.genderCodes )
            assert( parsingCode[7] in Greek.degreeCodes )

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self.name, BBB )
        self.thisBook.objectNameString = "Morph Greek NT Bible Book object"
        self.thisBook.objectTypeString = "MorphGNT"
        filepath = os.path.join( self.sourceFilepath, filename )
        if Globals.verbosityLevel > 2: print( "  Loading {}...".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected UTF-16 Byte Order Marker in {}".format( filename ) )
                        line = line[1:] # Remove the UTF-8 Byte Order Marker
                    if line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #print ( 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #print( unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.appendLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.appendLine( 'v', vn )
                        lastV = vn
                    self.thisBook.appendLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.appendLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #print( reference,bits[1],bits[2] ); halt
            if 0: #except:
                logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                if lineCount > 1: print( 'Previous line was: ', lastLine )
                else: print( 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            if Globals.verbosityLevel > 3: print( "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.saveBook( self.thisBook )
            #self.books[BBB] = self.thisBook
    # end of loadBook


    def xanalyzeWords( self ):
        """ Go through the NT data and do some filing and sorting of the Greek words. """
        if Globals.verbosityLevel > 3: print( "analyzeWords: have {} books in the loaded NT".format( len(self.books) ) )
        self.wordCounts = {} # Wordcount organized by BBB
        self.wordCounts['Total'] = 0
        self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {}
        for BBB in self.books:
            wordCount = len(self.books[BBB])
            self.wordCounts[BBB] = wordCount
            self.wordCounts['Total'] += wordCount
            if Globals.verbosityLevel > 3: print( "  analyzeWords: {} has {} Greek words".format( BBB, wordCount ) )
            for reference,parsing,(punctuatedWord,actualWord,normalizedWord,lemma) in self.books[BBB]: # Stuff is: reference,parsing,words
                # File the actual words
                if actualWord not in self.actualWordsToNormalized:
                    self.actualWordsToNormalized[actualWord] = [([reference],normalizedWord,)]
                    #print( "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] )
                else: # we've already had this word before
                    previous = self.actualWordsToNormalized[actualWord]
                    #print( "had", actualWord, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        #print( "  Found a new", normalizedWord, "normalized word for", actualWord, "was", previous )
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.actualWordsToNormalized[actualWord] = newList
                        #print( "  now have", newList )
                # File the normalized words
                if normalizedWord not in self.normalizedWordsToActual:
                    self.normalizedWordsToActual[normalizedWord] = [([reference],actualWord,)]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToActual[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, actualWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldActualWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldActualWord )
                        if actualWord == oldActualWord:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldActualWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldActualWord,) )
                    if not found:
                        newList.append( ([reference],actualWord,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToActual[normalizedWord] = newList
                        #print( "  now have", newList )
                if normalizedWord not in self.normalizedWordsToParsing:
                    self.normalizedWordsToParsing[normalizedWord] = [([reference],parsing,)]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToParsing[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, parsing )
                    found = changed = False
                    newList = []
                    for oldRefList,oldParsing in previous:
                        #print( "  oRL", oldRefList, "oP", oldParsing )
                        if parsing == oldParsing:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldParsing,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldParsing,) )
                    if not found:
                        newList.append( ([reference],parsing,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToParsing[normalizedWord] = newList
                        #print( "  now have", newList )
                # File the self.lemmasToNormalizedWords
                if lemma not in self.lemmasToNormalizedWords:
                    self.lemmasToNormalizedWords[lemma] = [([reference],normalizedWord,)]
                    #print( "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] )
                else: # we've already had this word before
                    previous = self.lemmasToNormalizedWords[lemma]
                    #print( "had", lemma, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.lemmasToNormalizedWords[lemma] = newList
                        #print( "  now have", newList )
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek words".format( self.wordCounts['Total'] ) )
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} actual Greek words".format( len(self.actualWordsToNormalized) ) )
        if Globals.verbosityLevel > 3:
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                print( "  ", aW, self.actualWordsToNormalized[aW] )
                if j==6: break
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToActual) ) )
        if Globals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToActual.keys() ):
                print( "  ", nW, self.normalizedWordsToActual[nW] )
                if j==6: break
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToParsing) ) )
        if Globals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToParsing.keys() ):
                print( "  ", nW, self.normalizedWordsToParsing[nW] )
                if j==6: break
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".format( len(self.lemmasToNormalizedWords) ) )
        if Globals.verbosityLevel > 3:
            for j,lem in enumerate( self.lemmasToNormalizedWords.keys() ):
                print( "  ", lem, self.lemmasToNormalizedWords[lem] )
                if j==6: break
        if 0:
            print( "The following actual words have multiple normalized forms:" )
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                if len(self.actualWordsToNormalized[aW])>1:
                    print( "  ", aW )
                    for entry in self.actualWordsToNormalized[aW]:
                        print( "    ", entry[1], self.normalizedWordsToParsing[entry[1]], entry[0] )

예제 #30

파일 보기

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' )
        if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line )
                    if match: vplType = 1
                    else:
                        match = re.search( '^(\\d{8})\\s', line )
                        if match: vplType = 2
                        else:
                            match = re.search( '^# language_name:\\s', line )
                            if match: vplType = 3
                            #else:
                                #match = re.search( '^; TITLE:\\s', line )
                                #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) )
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue
                    #print( 'vplType', vplType )

                #print ( 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if   line.startswith( '# language_name:' ):
                        string = line[16:].strip()
                        if string and string != 'Not available': settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith( '# closest ISO 639-3:' ):
                        string = line[20:].strip()
                        if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith( '# year_short:' ):
                        string = line[13:].strip()
                        if string and string != 'Not available': settingsDict['Year.short'] = string
                        continue
                    elif line.startswith( '# year_long:' ):
                        string = line[12:].strip()
                        if string and string != 'Not available': settingsDict['Year.long'] = string
                        continue
                    elif line.startswith( '# title:' ):
                        string = line[8:].strip()
                        if string and string != 'Not available': settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith( '# URL:' ):
                        string = line[6:].strip()
                        if string and string != 'Not available': settingsDict['URL'] = string
                        continue
                    elif line.startswith( '# copyright_short:' ):
                        string = line[18:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith( '# copyright_long:' ):
                        string = line[17:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.long'] = string
                        continue
                    elif line[0]=='#':
                        logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) )
                        continue # Just discard comment lines
                #elif vplType == 4:
                    #if line.startswith( '; TITLE:' ):
                        #string = line[8:].strip()
                        #if string: settingsDict['TITLE'] = string
                        #continue
                    #elif line.startswith( '; ABBREVIATION:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['ABBREVIATION'] = string
                        #continue
                    #elif line.startswith( '; HAS ITALICS:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_ITALICS'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES' ):
                        #string = line[14:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS REDLETTER:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_REDLETTER'] = string
                        #continue
                    #elif line[0]==';':
                        #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                        #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split( ' ', 2 )
                    #print( self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCode, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split( ':' )
                    else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )

                    if not bookCode and not chapterNumberString and not verseNumberString:
                        print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        continue
                    if BibleOrgSysGlobals.debugFlag: assert 2  <= len(bookCode) <= 4
                    if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int( chapterNumberString )
                    verseNumber = int( verseNumberString )

                    if bookCode != lastBookCode: # We've started a new book
                        #if bookCode in ('Ge',): BBB = 'GEN'
                        if bookCode in ('Le',): BBB = 'LEV'
                        elif bookCode in ('Jud',): BBB = 'JDG'
                        #elif bookCode in ('Es',): BBB = 'EST'
                        #elif bookCode in ('Pr',): BBB = 'PRO'
                        elif bookCode in ('So',): BBB = 'SNG'
                        elif bookCode in ('La',): BBB = 'LAM'
                        #elif bookCode in ('Jude',): BBB = 'JDE'
                        else:
                            #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCode )  # Try to guess
                            BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                            if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                            if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0]=='«':
                        #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB=='PSA' and verseNumberString=='1': # Psalm title
                            vBits = vText[1:].split( '»' )
                            #print( "vBits", vBits )
                            thisBook.addLine( 'd', vBits[0] ) # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                        #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        #continue
                    if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                        #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                        #if vText == lastVText:
                            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        #else:
                            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                elif vplType in (2,3):
                    bits = line.split( '\t', 1 )
                    #print( self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:]
                    #print( bookNumberString, chapterNumberString, verseNumberString )
                    while len(chapterNumberString)>1 and chapterNumberString[0]=='0':
                        chapterNumberString = chapterNumberString[1:] # Remove leading zeroes
                    while len(verseNumberString)>1 and verseNumberString[0]=='0':
                        verseNumberString = verseNumberString[1:] # Remove leading zeroes
                    bookCode, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCode != lastBookCode: # We've started a new book
                        bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS',
                                76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2',
                                85:'PSS', 86:'ODE', }
                        if 1 <= bookCode <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCode )
                        else: BBB = bnDict[bookCode]

                #elif vplType == 4:
                    #if line.startswith( '$$ ' ):
                        #if metadataName and metadataContents:
                            #settingsDict[metadataName] = metadataContents
                            #metadataName = None
                        #pointer = line[3:]
                        ##print( "pointer", repr(pointer) )
                        #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                            #metadataName = pointer[1:-1]
                            #if metadataName:
                                ##print( "metadataName", repr(metadataName) )
                                #metadataContents = ''
                        #else: # let's assume it's a BCV reference
                            #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                            #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                            #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                            #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                            #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                            #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                            #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                            #B_CV_Bits = pointer.split( ' ', 1 )
                            #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                                #bookCode, CVString = B_CV_Bits
                                #chapterNumberString, verseNumberString = CVString.split( ':' )
                                #chapterNumber = int( chapterNumberString )
                                #verseNumber = int( verseNumberString )
                                #if bookCode != lastBookCode: # We've started a new book
                                    #if bookCode in ('Ge',): BBB = 'GEN'
                                    #elif bookCode in ('Le',): BBB = 'LEV'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #else:
                                        ##print( "4BookCode =", repr(bookCode) )
                                        ##BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                        #BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                                        #if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                                        #if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess
                                        ##print( "4BBB =", repr(BBB) )
                            #else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )
                        #continue # Just save the pointer information which refers to the text on the next line
                    #else: # it's not a $$ line
                        #text = line
                        ##print( "text", repr(text) )
                        #if metadataName:
                            #metadataContents += ('\n' if metadataContents else '') + text
                            #continue
                        #else:
                            #vText = text
                            ## Handle bits like (<scripref>Pr 2:7</scripref>)
                            #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                            #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                            ##if '\\' in vText: print( 'VPL vText', repr(vText) )
                            #if vplType == 4: # Forge for SwordSearcher
                                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                ## Convert {stuff} to footnotes
                                #match = re.search( '\\{(.+?)\\}', vText )
                                #while match:
                                    #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                                    #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                                    ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\{(.+?)\\}', vText )
                                ## Convert [stuff] to added fields
                                #match = re.search( '\\[(.+?)\\]', vText )
                                #while match:
                                    #addText = '\\add {}\\add*'.format( match.group(1) )
                                    #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                                    ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\[(.+?)\\]', vText )
                                #for badChar in '{}[]':
                                    #if badChar in vText:
                                        #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                        #break

                else:
                    logging.critical( 'Unknown VPL type {}'.format( vplType ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt

                if bookCode:
                    if bookCode != lastBookCode: # We've started a new book
                        if lastBookCode != -1: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCode ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCode yet
                    logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #print( "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()

예제 #31

파일 보기

파일: VPLBible.py 프로젝트: alerque/BibleOrgSys

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    logging.info( "      VPLBible.load: Detected UTF-16 Byte Order Marker" )
                    line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'VLP file line is "' + line + '"' )
                if line[0]=='#': continue # Just discard comment lines

                bits = line.split( ' ', 2 )
                #print( self.givenName, BBB, bits )
                if len(bits) == 3 and ':' in bits[1]:
                    bookCode, CVString, vText = bits
                    chapterNumberString, verseNumberString = CVString.split( ':' )
                else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BibleOrgSysGlobals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() )
                if not verseNumberString.isdigit():
                    logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert( verseNumberString.isdigit() )
                    continue
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookCode != lastBookCode: # We've started a new book
                    if lastBookCode != -1: # Better save the last book
                        self.saveBook( thisBook )
                    #if bookCode in ('Ge',): BBB = 'GEN'
                    #elif bookCode in ('Le',): BBB = 'LEV'
                    ##elif bookCode in ('Jud',): BBB = 'JDG'
                    #elif bookCode in ('Es',): BBB = 'EST'
                    #elif bookCode in ('Pr',): BBB = 'PRO'
                    #elif bookCode in ('So',): BBB = 'SNG'
                    #elif bookCode in ('La',): BBB = 'LAM'
                    #elif bookCode in ('Jude',): BBB = 'JDE'
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB( bookCode )  # Try to guess
                    if BBB:
                        thisBook = BibleBook( self, BBB )
                        thisBook.objectNameString = "VPL Bible Book object"
                        thisBook.objectTypeString = "VPL"
                        lastBookCode = bookCode
                        lastChapterNumber = lastVerseNumber = -1
                    else:
                        logging.critical( "VPLBible could not figure out {!r} book code".format( bookCode ) )
                        if BibleOrgSysGlobals.debugFlag: halt

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    thisBook.addLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle special formatting
                #   [brackets] are for Italicized words
                #   <brackets> are for the Words of Christ in Red
                #   «brackets»  are for the Titles in the Book  of Psalms.
                vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                if vText and vText[0]=='«':
                    #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                    if BBB=='PSA' and verseNumberString=='1': # Psalm title
                        vBits = vText[1:].split( '»' )
                        #print( "vBits", vBits )
                        thisBook.addLine( 'd', vBits[0] ) # Psalm title
                        vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()

예제 #32

파일 보기

파일: YETBible.py 프로젝트: dimleyk/BibleOrgSys

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        def decodeVerse( encodedVerseString ):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith( '@@' ): # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith( '@@' ): # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace( '@^', '\\\\p ' )
            verseString = verseString.replace( '@0', '\\\\m ' )
            verseString = verseString.replace( '@1', '\\\\q1 ' ).replace( '@2', '\\\\q2 ' ).replace( '@3', '\\\\q3 ' ).replace( '@4', '\\q4 ' )
            verseString = verseString.replace( '@8', '\\\\m ' )
            # Character markers (marked now with single backslash)
            verseString = verseString.replace( '@6', '\\wj ' ).replace( '@5', '\\wj*' )
            verseString = verseString.replace( '@9', '\\add ' ).replace( '@7', '\\add*' ) # or \\i ???
            verseString = re.sub( r'@<f([0-9])@>@/', r'\\ff\1', verseString )
            verseString = re.sub( r'@<x([0-9])@>@/', r'\\xx\1', verseString )
            #print( repr( verseString ) )
            assert( '@' not in verseString )
            return verseString
        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(), OrderedDict(), {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      YETBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'YETBible file line is "' + line + '"' )

                bits = line.split( '\t' )
                #print( self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert( len(bits) == 3 )
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert( 2 <= len(locale) <= 3 )
                        if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert( 3 <= len(bits) <= 4 )
                    thisBBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1] )
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert( len(bits) == 5 )
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB: # We have a new book
                        if lastBBB is not None: # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert( BBB in bookNameDict )
                        bookLines = OrderedDict() # Keys are (C,V) strings
                    verseString = decodeVerse( encodedVerseString )
                    bookLines[(chapterNumberString,verseNumberString)] = verseString # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert( len(bits) == 5 )
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    headingString = encodedHeadingString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    #print( repr(encodedHeadingString), repr(headingString) )
                    assert( '@' not in headingString )
                    headingDict[(BBB,chapterNumberString,verseNumberString)] = headingString, [] # Blank refList
                    continue
                elif bits[0] == 'parallel': # These lines optionally follow pericope lines
                    assert( len(bits) == 2 )
                    heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)]
                    refList.append( bits[1] )
                    #print( "parallel2", repr(heading), refList )
                    headingDict[(BBB,chapterNumberString,verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert( len(bits) == 6 )
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                        assert( indexNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now
                    noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace( '@/', '' )
                    #print( repr(encodedNoteString), repr(noteString) )
                    assert( '@' not in noteString )
                    xrefDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert( len(bits) == 6 )
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                        assert( indexNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    assert( '@' not in noteString )
                    footnoteDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString
                    continue
                else: print( "YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt
            bookDict[lastBBB] = bookLines # Save the last book


                #if bookCode != lastBookCode: # We've started a new book
                    #if lastBookCode != -1: # Better save the last book
                        #self.saveBook( thisBook )
                    #BBB = Globals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode )
                    #thisBook = BibleBook( self.name, BBB )
                    #thisBook.objectNameString = "YET Bible Book object"
                    #thisBook.objectTypeString = "YET"
                    #lastBookCode = bookCode
                    #lastChapterNumber = lastVerseNumber = -1

                #if chapterNumber != lastChapterNumber: # We've started a new chapter
                    #if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    #if chapterNumber == 0:
                        #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #thisBook.appendLine( 'c', chapterNumberString )
                    #lastChapterNumber = chapterNumber
                    #lastVerseNumber = -1

                ## Handle the verse info
                #if verseNumber==lastVerseNumber and vText==lastVText:
                    #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    ## Move Psalm titles to verse zero
                    #verseNumber = 0
                #if verseNumber < lastVerseNumber:
                    #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #elif verseNumber == lastVerseNumber:
                    #if vText == lastVText:
                        #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #else:
                        #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                #lastVText = vText
                #lastVerseNumber = verseNumber

        # Now process the books
        for BBB,bkData in bookDict.items():
            #print( "Processing", BBB )
            thisBook = BibleBook( self.name, BBB )
            thisBook.objectNameString = "YET Bible Book object"
            thisBook.objectTypeString = "YET"
            lastChapterNumberString = None
            for (chapterNumberString,verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB,chapterNumberString,verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)]
                    #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.appendLine( 's', heading )
                    if refList:
                        refString = ""
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.appendLine( 'r', '('+refString+')' )
                # Insert footnotes and cross-references
                while( '\\ff' in verseString ):
                    #print( "footnote", repr(verseString) )
                    fIx = verseString.index( '\\ff' )
                    caller = verseString[fIx+3]
                    #print( "fcaller", repr(caller) )
                    assert( caller.isdigit() )
                    note = footnoteDict[(BBB,chapterNumberString,verseNumberString,caller)]
                    #print( "fnote", repr(note) )
                    verseString = verseString[:fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[fIx+4:]
                    #print( "fvS", repr(verseString) )
                while( '\\xx' in verseString ):
                    #print( "xref", repr(verseString) )
                    fIx = verseString.index( '\\xx' )
                    caller = verseString[fIx+3]
                    #print( "xcaller", repr(caller) )
                    assert( caller.isdigit() )
                    note = xrefDict[(BBB,chapterNumberString,verseNumberString,caller)]
                    #print( "xnote", repr(note) )
                    verseString = verseString[:fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[fIx+4:]
                    #print( "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumberString = chapterNumberString
                #print( BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith( '\\\\' ):  # It's an initial paragraph marker
                    if verseString[3]==' ': marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4]==' ': marker, verseString = verseString[2:4], verseString[5:]
                    else: halt
                    #print( '', '\\'+marker )
                    thisBook.appendLine( marker, '' )
                assert( not verseString.startswith( '\\\\' ) )
                bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers)
                for j,bit in enumerate(bits):
                    #print( "loop", j, repr(bit), repr(verseString) )
                    if j==0: thisBook.appendLine( 'v', verseNumberString + ' ' + verseString.rstrip() )
                    else:
                        if bit[1]==' ': marker, bit = bit[0], bit[2:]
                        elif bit[2]==' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #print( "mV", marker, repr(bit), repr(verseString) )
                        thisBook.appendLine( marker, bit.rstrip() )
            self.saveBook( thisBook )
        self.doPostLoadProcessing()

예제 #33

파일 보기

파일: MySwordBible.py 프로젝트: DanTrevor/BibleOrgSys

    def load(self):
        """
        Load all the books out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("load()"))
        assert self.preloadDone

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[
                'MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("  MySword saving", BBB, bookCount + 1)
                        self.stashBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BibleOrganisationalSystem.getNumVersesList(
                        BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        del self.cursor
        self.applySuppliedMetadata('MySword')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

예제 #34

파일 보기

    def loadBook( self, BBB, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert len(bits) == 7
            #print( bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #print( b, c, v )

            POSCode = bits[1]
            assert len(POSCode) == 2
            assert POSCode in Greek.POSCodes.keys()

            parsingCode = bits[2]
            assert len(parsingCode) == 8
            #print( parsingCode )
            for j,char in enumerate(parsingCode):
                assert char in Greek.parsingCodes[j]
            assert parsingCode[0] in Greek.personCodes
            assert parsingCode[1] in Greek.tenseCodes
            assert parsingCode[2] in Greek.voiceCodes
            assert parsingCode[3] in Greek.modeCodes
            assert parsingCode[4] in Greek.caseCodes
            assert parsingCode[5] in Greek.numberCodes
            assert parsingCode[6] in Greek.genderCodes
            assert parsingCode[7] in Greek.degreeCodes

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = 'Morph Greek NT Bible Book object'
        self.thisBook.objectTypeString = 'MorphGNT'
        filepath = os.path.join( self.sourceFilepath, filename )
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Loading {}…".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #print ( 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #print( unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.addLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.addLine( 'v', vn )
                        lastV = vn
                    self.thisBook.addLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.addLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #print( reference,bits[1],bits[2] ); halt
            #if 0: #except:
                #logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                #if lineCount > 1: print( 'Previous line was: ', lastLine )
                #else: print( 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            if BibleOrgSysGlobals.verbosityLevel > 3: print( "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.stashBook( self.thisBook )

예제 #35

파일 보기

파일: MySwordBible.py 프로젝트: DanTrevor/BibleOrgSys

    def loadBook(self, BBB):
        """
        Load the requested book out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("loadBook( {} )").format(BBB))
        assert self.preloadDone

        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag:
                print("  {} is already loaded -- returning".format(BBB))
            return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading MySwordBible {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(
                _("MySwordBible: Loading {} from {}…").format(
                    BBB, self.sourceFilepath))

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'BOTH', 'GEN'
        #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
        #testament, BBB = 'OT', 'GEN'
        #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'NT', 'MAT'
        #booksExpected, textLineCountExpected = 1, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C <= numC:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1
                else:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 2:
                            print("  MySword saving", BBB)
                        self.stashBook(thisBook)
                    #else: print( "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False

예제 #36

파일 보기

파일: GreekNT.py 프로젝트: dimleyk/BibleOrgSys

    def loadBook( self, BBB, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert( len(bits) == 7 )
            #print( bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #print( b, c, v )

            POSCode = bits[1]
            assert( len(POSCode) == 2 )
            assert( POSCode in Greek.POSCodes.keys() )

            parsingCode = bits[2]
            assert( len(parsingCode) == 8 )
            #print( parsingCode )
            for j,char in enumerate(parsingCode):
                assert( char in Greek.parsingCodes[j] )
            assert( parsingCode[0] in Greek.personCodes )
            assert( parsingCode[1] in Greek.tenseCodes )
            assert( parsingCode[2] in Greek.voiceCodes )
            assert( parsingCode[3] in Greek.modeCodes )
            assert( parsingCode[4] in Greek.caseCodes )
            assert( parsingCode[5] in Greek.numberCodes )
            assert( parsingCode[6] in Greek.genderCodes )
            assert( parsingCode[7] in Greek.degreeCodes )

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self.name, BBB )
        self.thisBook.objectNameString = "Morph Greek NT Bible Book object"
        self.thisBook.objectTypeString = "MorphGNT"
        filepath = os.path.join( self.sourceFilepath, filename )
        if Globals.verbosityLevel > 2: print( "  Loading {}...".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected UTF-16 Byte Order Marker in {}".format( filename ) )
                        line = line[1:] # Remove the UTF-8 Byte Order Marker
                    if line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #print ( 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #print( unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.appendLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.appendLine( 'v', vn )
                        lastV = vn
                    self.thisBook.appendLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.appendLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #print( reference,bits[1],bits[2] ); halt
            if 0: #except:
                logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                if lineCount > 1: print( 'Previous line was: ', lastLine )
                else: print( 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            if Globals.verbosityLevel > 3: print( "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.saveBook( self.thisBook )

예제 #37

파일 보기

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BibleFilenameEndingsToAccept[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute('select * from Details')
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(
                self.settingsDict['Description']) < 40:
            self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict:
            self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename, self.settingsDict['encryption']))

        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Found missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.settingsDict:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}"
                            .format(BBB, C, V, repr(line)))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}"
                            .format(BBB, C, V, repr(line), self.settingsDict))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("Saving", BBB, bookCount + 1)
                        self.saveBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList(BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()

예제 #38

파일 보기

파일: DrupalBible.py 프로젝트: gimapei/BibleOrgSys

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        status = 0  # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1:
                    if line[0] == chr(65279):  #U+FEFF
                        logging.info(
                            "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(self.sourceFilepath))
                        line = line[
                            1:]  # Remove the UTF-16 Unicode Byte Order Marker (BOM)
                    elif line[:3] == 'ï»¿':  # 0xEF,0xBB,0xBF
                        logging.info(
                            "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(self.sourceFilepath))
                        line = line[
                            3:]  # Remove the UTF-8 Unicode Byte Order Marker (BOM)
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines

                #print ( 'DB file line is "' + line + '"' )
                if line[0] == '#': continue  # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning(
                            "Unknown DrupalBible first line: {}".format(
                                repr(line)))

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else:  # Get the version name details
                        bits = line.split('|')
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else:  # Get the book name details
                        bits = line.split('|')
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert bookShortName == bookCode
                        BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode(
                            bookCode)
                        BBB = BBBresult if isinstance(
                            BBBresult, str
                        ) else BBBresult[
                            0]  # Result can be string or list of strings (best guess first)
                        bookDetails[
                            BBB] = bookFullName, bookShortName, numChapters

                elif status == 2:  # Get the verse text
                    bits = line.split('|')
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark:
                        print(repr(lineMark))
                        halt
                    BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode(
                        bookCode)
                    BBB = BBBresult if isinstance(
                        BBBresult, str
                    ) else BBBresult[
                        0]  # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.stashBook(thisBook)
                        thisBook = BibleBook(self, BBB)
                        thisBook.objectNameString = 'DrupalBible Bible Book object'
                        thisBook.objectTypeString = 'DrupalBible'
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.addLine('c', chapterNumberString)
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace('<', '\\it ').replace(
                        '>', '\\it*')
                    thisBook.addLine('v', verseNumberString + ' ' + verseText)

                else:
                    halt

        # Save the final book
        self.stashBook(thisBook)
        self.doPostLoadProcessing()

예제 #39

파일 보기

파일: USFXXMLBible.py 프로젝트: alerque/BibleOrgSys

    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        BibleOrgSysGlobals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        BibleOrgSysGlobals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, BBB, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'vsg3' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'jsa0' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'dj35' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'hs35' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.addLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ss13' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.addLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation, BBB, C, V )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation, BBB, C, V )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, BBB, C )
            elif element.tag == 'b':
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'nd04' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.addLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'od01' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'gd92' )
                idField = None
                for attrib,value in element.items():
                    if attrib == 'id': idField = value
                    else:
                        logging.warning( _("dv35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if idField and text is None:
                    text = idField
                else:
                    logging.warning( _("dve4 Unprocessed idField ({}) in {}").format( idField, location ) )
                if text is None:
                    logging.critical( "Why is {} empty at {}".format( marker, location ) )
                assert( text is not None )
                self.thisBook.addLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 've': # What's this in Psalms: <c id="4" /><ve /><d>For the Chief Musician; on stringed instruments. A Psalm of David.</d>
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'kds3' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ks29' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'kj24' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js91' )
                #self.thisBook.addLine( 'b', '' )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring 've' field", BBB, C, V )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
        self.saveBook( self.thisBook )

예제 #40

파일 보기

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      CSVBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if line == ' ':
                    continue  # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0] == '#': continue  # Just discard comment lines
                if lineCount == 1:
                    if line.startswith('"Book",'):
                        quoted = True
                        continue  # Just discard header line
                    elif line.startswith('Book,'):
                        quoted = False
                        continue  # Just discard header line

                bits = line.split(',', 3)
                #print( lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bString, chapterNumberString, verseNumberString,
                          vText, len(bits), bits)

                # Remove quote marks from these strings
                if quoted:
                    if len(bString) >= 2 and bString[0] == bString[
                            -1] and bString[0] in '"\'':
                        bString = bString[1:-1]
                    if len(chapterNumberString) >= 2 and chapterNumberString[
                            0] == chapterNumberString[
                                -1] and chapterNumberString[0] in '"\'':
                        chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString) >= 2 and verseNumberString[
                            0] == verseNumberString[-1] and verseNumberString[
                                0] in '"\'':
                        verseNumberString = verseNumberString[1:-1]
                    if len(vText) >= 2 and vText[0] == vText[-1] and vText[
                            0] in '"\'':
                        vText = vText[1:-1]
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #continue
                #if BibleOrgSysGlobals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                #if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() )
                #if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() )
                bookNumber = int(bString)
                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)

                if bookNumber != lastBookNumber:  # We've started a new book
                    if lastBookNumber != -1:  # Better save the last book
                        self.saveBook(thisBook)
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumber)  # Try to guess
                    assert (BBB)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "CSV Bible Book object"
                    thisBook.objectTypeString = "CSV"
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert (chapterNumber > lastChapterNumber
                                or BBB == 'ESG'
                                )  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookNumber,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace('\\ldblquote', '“').replace(
                    '\\rdblquote', '”').replace('\\lquote',
                                                '‘').replace('\\rquote', '’')
                vText = vText.replace('\\emdash', '—').replace('\\endash', '–')
                # Now do Unicode characters
                while True:  # Find patterns like \\'d3
                    match = re.search(r"\\'[0-9a-f][0-9a-f]", vText)
                    if not match: break
                    i = int(vText[match.start() + 2:match.end()],
                            16)  # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr(
                        i) + vText[match.end():]
                while True:  # Find patterns like \\u253?
                    match = re.search(r"\\u[1-2][0-9][0-9]\?", vText)
                    if not match: break
                    i = int(vText[match.start() + 2:match.end() -
                                  1])  # Convert three digits to decimal
                    vText = vText[:match.start()] + chr(
                        i) + vText[match.end():]
                #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                #assert( BBB=='PSA' and verseNumberString=='1' )
                #vBits = vText[1:].split( '»' )
                ##print( "vBits", vBits )
                #thisBook.addLine( 'd', vBits[0] ) # Psalm title
                #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()

예제 #41

파일 보기

    def load( self ):
        """
        Load all the books out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("load()") )
        assert self.preloadDone

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )


        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3: print( "  MySword saving", BBB, bookCount+1 )
                        self.stashBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        self.applySuppliedMetadata( 'MySword' ) # Copy some to self.settingsDict
        self.doPostLoadProcessing()

예제 #42

파일 보기

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        def decodeVerse(encodedVerseString):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace('@^', '\\\\p ')
            verseString = verseString.replace('@0', '\\\\m ')
            verseString = verseString.replace('@1', '\\\\q1 ').replace(
                '@2', '\\\\q2 ').replace('@3',
                                         '\\\\q3 ').replace('@4', '\\q4 ')
            verseString = verseString.replace('@8', '\\\\m ')
            # Character markers (marked now with single backslash)
            verseString = verseString.replace('@6',
                                              '\\wj ').replace('@5', '\\wj*')
            verseString = verseString.replace('@9', '\\add ').replace(
                '@7', '\\add*')  # or \\i ???
            verseString = re.sub(r'@<f([0-9])@>@/', r'\\ff\1', verseString)
            verseString = re.sub(r'@<x([0-9])@>@/', r'\\xx\1', verseString)
            #print( repr( verseString ) )
            assert ('@' not in verseString)
            return verseString

        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(
        ), OrderedDict(), {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      YETBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'YETBible file line is "' + line + '"' )

                bits = line.split('\t')
                #print( self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert (len(bits) == 3)
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert (2 <= len(locale) <= 3)
                        if locale == 'in':
                            locale = 'id'  # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert (3 <= len(bits) <= 4)
                    thisBBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bits[1])
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert (len(bits) == 5)
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB:  # We have a new book
                        if lastBBB is not None:  # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert (BBB in bookNameDict)
                        bookLines = OrderedDict()  # Keys are (C,V) strings
                    verseString = decodeVerse(encodedVerseString)
                    bookLines[(chapterNumberString, verseNumberString
                               )] = verseString  # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert (len(bits) == 5)
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    headingString = encodedHeadingString.replace(
                        '@9', '\\it ').replace('@7', '\\it*')
                    #print( repr(encodedHeadingString), repr(headingString) )
                    assert ('@' not in headingString)
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = headingString, [
                                 ]  # Blank refList
                    continue
                elif bits[
                        0] == 'parallel':  # These lines optionally follow pericope lines
                    assert (len(bits) == 2)
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    refList.append(bits[1])
                    #print( "parallel2", repr(heading), refList )
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert (len(bits) == 6)
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                        assert (indexNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    noteString = re.sub(
                        r'@<ta(.+?)@>', r'', noteString
                    )  # Get rid of these encoded BCV references for now
                    noteString = re.sub(
                        r'@<to(.+?)@>', r'', noteString
                    )  # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace('@/', '')
                    #print( repr(encodedNoteString), repr(noteString) )
                    assert ('@' not in noteString)
                    xrefDict[(BBB, chapterNumberString, verseNumberString,
                              indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert (len(bits) == 6)
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                        assert (indexNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    assert ('@' not in noteString)
                    footnoteDict[(BBB, chapterNumberString, verseNumberString,
                                  indexNumberString)] = noteString
                    continue
                else:
                    print("YETBible: Unknown line type", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)
                    halt
            bookDict[lastBBB] = bookLines  # Save the last book

            #if bookCode != lastBookCode: # We've started a new book
            #if lastBookCode != -1: # Better save the last book
            #self.saveBook( thisBook )
            #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode )
            #thisBook = BibleBook( self, BBB )
            #thisBook.objectNameString = "YET Bible Book object"
            #thisBook.objectTypeString = "YET"
            #lastBookCode = bookCode
            #lastChapterNumber = lastVerseNumber = -1

            #if chapterNumber != lastChapterNumber: # We've started a new chapter
            #if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
            #if chapterNumber == 0:
            #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #thisBook.addLine( 'c', chapterNumberString )
            #lastChapterNumber = chapterNumber
            #lastVerseNumber = -1

            ## Handle the verse info
            #if verseNumber==lastVerseNumber and vText==lastVText:
            #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #continue
            #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
            ## Move Psalm titles to verse zero
            #verseNumber = 0
            #if verseNumber < lastVerseNumber:
            #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #elif verseNumber == lastVerseNumber:
            #if vText == lastVText:
            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #else:
            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #thisBook.addLine( 'v', verseNumberString + ' ' + vText )
            #lastVText = vText
            #lastVerseNumber = verseNumber

        # Now process the books
        for BBB, bkData in bookDict.items():
            #print( "Processing", BBB )
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = "YET Bible Book object"
            thisBook.objectTypeString = "YET"
            lastChapterNumberString = None
            for (chapterNumberString,
                 verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB, chapterNumberString,
                        verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.addLine('s', heading)
                    if refList:
                        refString = ""
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.addLine('r', '(' + refString + ')')
                # Insert footnotes and cross-references
                while ('\\ff' in verseString):
                    #print( "footnote", repr(verseString) )
                    fIx = verseString.index('\\ff')
                    caller = verseString[fIx + 3]
                    #print( "fcaller", repr(caller) )
                    assert (caller.isdigit())
                    note = footnoteDict[(BBB, chapterNumberString,
                                         verseNumberString, caller)]
                    #print( "fnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[
                                                  fIx + 4:]
                    #print( "fvS", repr(verseString) )
                while ('\\xx' in verseString):
                    #print( "xref", repr(verseString) )
                    fIx = verseString.index('\\xx')
                    caller = verseString[fIx + 3]
                    #print( "xcaller", repr(caller) )
                    assert (caller.isdigit())
                    note = xrefDict[(BBB, chapterNumberString,
                                     verseNumberString, caller)]
                    #print( "xnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[
                                                  fIx + 4:]
                    #print( "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumberString = chapterNumberString
                #print( BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith(
                        '\\\\'):  # It's an initial paragraph marker
                    if verseString[3] == ' ':
                        marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4] == ' ':
                        marker, verseString = verseString[2:4], verseString[5:]
                    else:
                        halt
                    #print( '', '\\'+marker )
                    thisBook.addLine(marker, '')
                assert (not verseString.startswith('\\\\'))
                bits = verseString.split(
                    '\\\\'
                )  # Split on paragraph markers (but not character markers)
                for j, bit in enumerate(bits):
                    #print( "loop", j, repr(bit), repr(verseString) )
                    if j == 0:
                        thisBook.addLine(
                            'v',
                            verseNumberString + ' ' + verseString.rstrip())
                    else:
                        if bit[1] == ' ': marker, bit = bit[0], bit[2:]
                        elif bit[2] == ' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #print( "mV", marker, repr(bit), repr(verseString) )
                        thisBook.addLine(marker, bit.rstrip())
            self.saveBook(thisBook)
        self.doPostLoadProcessing()

예제 #43

파일 보기

    def loadBook( self, BBB ):
        """
        Load the requested book out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("loadBook( {} )").format( BBB ) )
        assert self.preloadDone

        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag: print( "  {} is already loaded -- returning".format( BBB ) )
            return # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name ) )
            return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath ) )

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
            #testament, BBB = 'BOTH', 'GEN'
            #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
            #testament, BBB = 'OT', 'GEN'
            #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
            #testament, BBB = 'NT', 'MAT'
            #booksExpected, textLineCountExpected = 1, 7957


        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C <= numC: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1
                else: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  MySword saving", BBB )
                        self.stashBook( thisBook )
                    #else: print( "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

예제 #44

파일 보기

파일: UnboundBible.py 프로젝트: openscriptures/BibleOrgSys

    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata is None:
            self.suppliedMetadata = {}
        self.suppliedMetadata["Unbound"] = {}

        lastLine, lineCount = "", 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ""
        with open(self.sourceFilepath, encoding=self.encoding) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                # if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                # logging.info( "      UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" )
                # line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line[-1] == "\n":
                    line = line[:-1]  # Removing trailing newline character
                if not line:
                    continue  # Just discard blank lines
                lastLine = line
                # print ( 'UB file line is "' + line + '"' )
                if line[0] == "#":
                    hashBits = line[1:].split("\t")
                    if len(hashBits) == 2 and hashBits[1]:  # We have some valid meta-data
                        self.suppliedMetadata["Unbound"][hashBits[0]] = hashBits[1]
                        # if hashBits[0] == 'name': self.name = hashBits[1]
                        # elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        # elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        # elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        # elif hashBits[0] == 'language': self.language = hashBits[1]
                        # elif hashBits[0] == 'note': self.note = hashBits[1]
                        # elif hashBits[0] == 'columns': self.columns = hashBits[1]
                        # logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue  # Just discard comment lines

                bits = line.split("\t")
                # print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = (
                        bits
                    )
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = (
                        bits
                    )
                elif len(bits) == 1 and self.givenName.startswith("lxx_a_parsing_"):
                    logging.warning(
                        _("Skipping bad {!r} line in {} {} {} {}:{}").format(
                            line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )
                    continue
                else:
                    print(
                        "Unexpected number of bits",
                        self.givenName,
                        BBB,
                        bookCode,
                        chapterNumberString,
                        verseNumberString,
                        len(bits),
                        bits,
                    )
                    halt

                if NRSVA_bookCode:
                    assert len(NRSVA_bookCode) == 3
                if NRSVA_chapterNumberString:
                    assert NRSVA_chapterNumberString.isdigit()
                if NRSVA_verseNumberString:
                    assert NRSVA_verseNumberString.isdigit()

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print(
                        "Skipping empty line in {} {} {} {}:{}".format(
                            self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )
                    continue
                if BibleOrgSysGlobals.debugFlag:
                    assert len(bookCode) == 3
                if BibleOrgSysGlobals.debugFlag:
                    assert chapterNumberString.isdigit()
                if BibleOrgSysGlobals.debugFlag:
                    assert verseNumberString.isdigit()

                if subverseNumberString:
                    logging.warning(
                        _("subverseNumberString {!r} in {} {} {}:{}").format(
                            subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )

                vText = vText.strip()  # Remove leading and trailing spaces
                if not vText:
                    continue  # Just ignore blank verses I think
                if vText == "+":
                    continue  # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumberString.isdigit()
                    sequenceNumber = int(sequenceNumberString)
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumber > lastSequence or self.givenName in (
                            "gothic_latin",
                            "hebrew_bhs_consonants",
                            "hebrew_bhs_vowels",
                            "latvian_nt",
                            "ukrainian_1871",
                        )  # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.stashBook(thisBook)
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode(bookCode)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "Unbound Bible Book object"
                    thisBook.objectTypeString = "Unbound"
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumber > lastChapterNumber or BBB == "ESG"  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                            )
                        )
                    thisBook.addLine("c", chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").format(
                            self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )
                    continue
                if (
                    BBB == "PSA"
                    and verseNumberString == "1"
                    and vText.startswith("&lt;")
                    and self.givenName == "basic_english"
                ):
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format(
                            lastVerseNumber,
                            verseNumber,
                            self.givenName,
                            BBB,
                            bookCode,
                            chapterNumberString,
                            verseNumberString,
                        )
                    )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").format(
                                verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                            )
                        )
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}").format(
                                verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                            )
                        )
                thisBook.addLine("v", verseNumberString + " " + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.stashBook(thisBook)
        self.applySuppliedMetadata("Unbound")  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

예제 #45

파일 보기

파일: MySwordBible.py 프로젝트: alerque/BibleOrgSys

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()

예제 #46

파일 보기

파일: USFXXMLBible.py 프로젝트: alerque/BibleOrgSys

class USFXXMLBible( Bible ):
    """
    Class to load and manipulate USFX Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ):
        """
        Create the internal USFX Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFX XML Bible object"
        self.objectTypeString = "USFX"

        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters

        # Now we can set our object variables
        self.name = self.givenName
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFX Bible"
        if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects

        # Do a preliminary check on the readability of our folder
        if not os.access( self.sourceFolder, os.R_OK ):
            logging.error( "USFXXMLBible: Folder {!r} is unreadable".format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        self.sourceFilename = self.sourceFilepath = None
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundFiles.append( something )
            else: logging.error( "Not sure what {!r} is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in {!r}: {}".format( self.sourceFolder, foundFolders ) )
        if not foundFiles:
            if BibleOrgSysGlobals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in {!r}".format( self.sourceFolder ) )
            return # No use continuing

        #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles )
        numFound = 0
        for thisFilename in sorted( foundFiles ):
            firstLines = BibleOrgSysGlobals.peekIntoFile( thisFilename, sourceFolder, numLines=3 )
            if not firstLines or len(firstLines)<2: continue
            if not firstLines[0].startswith( '<?xml version="1.0"' ) \
            and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXB (unexpected) first line was {!r} in {}".format( firstLines, thisFilename ) )
                continue
            if "<usfx " not in firstLines[0]:
                continue
            lastFilenameFound = thisFilename
            numFound += 1
        if numFound:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound )
            if numFound == 1:
                self.sourceFilename = lastFilenameFound
                self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename )
        elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )
    # end of USFXXMLBible.__init_


    def load( self ):
        """
        Load the XML data file -- we should already know the filepath.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

                                #if BibleOrgSysGlobals.verbosityLevel > 2: print( _("  It seems we have {}...").format( BBB ) )
                        #self.thisBook = BibleBook( self, BBB )
                        #self.thisBook.objectNameString = "OSIS XML Bible Book object"
                        #self.thisBook.objectTypeString = "OSIS"
                        #self.haveBook = True

        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError:
            errorString = sys.exc_info()[1]
            logging.critical( "USFXXMLBible.load: failed loading the xml file {}: {!r}.".format( self.sourceFilepath, errorString ) )
            return
        if BibleOrgSysGlobals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (osis) container
        if self.tree.tag == 'usfx':
            location = "USFX file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )
            # Process the attributes first
            self.schemaLocation = None
            for attrib,value in self.tree.items():
                #print( "attrib", repr(attrib), repr(value) )
                if attrib.endswith("SchemaLocation"):
                    self.schemaLocation = value
                else:
                    logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) )
            BBB = C = V = None
            for element in self.tree:
                #print( "element", repr(element.tag) )
                sublocation = element.tag + " " + location
                if element.tag == 'languageCode':
                    self.languageCode = element.text
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'cff3' )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'des1' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'dwf2' )
                elif element.tag == 'book':
                    self.loadBook( element )
                    ##BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '54f2' )
                    #BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'hd35' )
                    ## Process the attributes
                    #idField = bookStyle = None
                    #for attrib,value in element.items():
                        #if attrib=='id' or attrib=='code':
                            #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            ##if idField != BBB:
                            ##    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        #elif attrib=='style':
                            #bookStyle = value
                        #else:
                            #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                else:
                    logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) )
                    #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print( "USFXXMLBible.load: Didn't find any regularly named USFX files in {!r}".format( self.sourceFolder ) )
            for thisFilename in foundFiles:
                # Look for BBB in the ID line (which should be the first line in a USFX file)
                isUSFX = False
                thisPath = os.path.join( self.sourceFolder, thisFilename )
                with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done
                    for line in possibleUSXFile:
                        if line.startswith( '\\id ' ):
                            USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id
                            if BibleOrgSysGlobals.verbosityLevel > 2: print( "Have possible USFX ID {!r}".format( USXId ) )
                            BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( USXId )
                            if BibleOrgSysGlobals.verbosityLevel > 2: print( "BBB is {!r}".format( BBB ) )
                            isUSFX = True
                        break # We only look at the first line
                if isUSFX:
                    UBB = USFXXMLBibleBook( self, BBB )
                    UBB.load( self.sourceFolder, thisFilename, self.encoding )
                    UBB.validateMarkers()
                    print( UBB )
                    self.books[BBB] = UBB
                    # Make up our book name dictionaries while we're at it
                    assumedBookNames = UBB.getAssumedBookNames()
                    for assumedBookName in assumedBookNames:
                        self.BBBToNameDict[BBB] = assumedBookName
                        assumedBookNameLower = assumedBookName.lower()
                        self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces)
            if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) )
        self.doPostLoadProcessing()
    # end of USFXXMLBible.load


    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if BibleOrgSysGlobals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        BibleOrgSysGlobals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        BibleOrgSysGlobals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, BBB, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'vsg3' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'jsa0' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'dj35' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'hs35' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.addLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ss13' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.addLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.addLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation, BBB, C, V )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation, BBB, C, V )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, BBB, C )
            elif element.tag == 'b':
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'ks35' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'gs35' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'nd04' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.addLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'od01' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'gd92' )
                idField = None
                for attrib,value in element.items():
                    if attrib == 'id': idField = value
                    else:
                        logging.warning( _("dv35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if idField and text is None:
                    text = idField
                else:
                    logging.warning( _("dve4 Unprocessed idField ({}) in {}").format( idField, location ) )
                if text is None:
                    logging.critical( "Why is {} empty at {}".format( marker, location ) )
                assert( text is not None )
                self.thisBook.addLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 've': # What's this in Psalms: <c id="4" /><ve /><d>For the Chief Musician; on stringed instruments. A Psalm of David.</d>
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'kds3' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ks29' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'kj24' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'js91' )
                #self.thisBook.addLine( 'b', '' )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring 've' field", BBB, C, V )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
        self.saveBook( self.thisBook )
    # end of USFXXMLBible.loadBook


    def loadParagraph( self, paragraphElement, paragraphLocation, BBB, C ):
        """
        Load the paragraph (p or q) container from the XML data file.
        """
        #if BibleOrgSysGlobals.verbosityLevel > 3:
            #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        V = None
        pTag, pText = paragraphElement.tag, clean(paragraphElement.text)
        BibleOrgSysGlobals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' )

        # Process the attributes first
        sfm = level = style = None
        for attrib,value in paragraphElement.items():
            if attrib == 'sfm': sfm = value
            elif attrib == 'level': level = value
            elif attrib == 'style': style = value
            else:
                logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) )

        if sfm:
            assert( pTag == 'p' )
            pTag = sfm
        if level:
            #assert( pTag == 'q' ) # Could also be mt, etc.
            pTag += level
        if style:
            #print( repr(pTag), repr(pText), repr(style) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "Ignoring {!r} style".format( style ) )

        self.thisBook.addLine( pTag, '' if pText is None else pText )

        for element in paragraphElement:
            location = element.tag + " of " + paragraphLocation
            #print( "element", repr(element.tag) )
            if element.tag == 'v': # verse milestone
                vTail = clean( element.tail ) # Main verse text
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'crc2' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'lct3' )
                lastV, V = V, None
                for attrib,value in element.items():
                    if attrib == 'id':
                        V = value
                    else:
                        logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                assert( V is not None )
                assert( V )
                self.thisBook.addLine( 'v', V + ((' '+vTail) if vTail else '' ) )
            elif element.tag == 've': # verse end milestone -- we can just ignore this
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'lsc3' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'mfy4' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'bd24' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'ks35' )
            elif element.tag == 'fig':
                self.loadFigure( element, location )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) )
                self.loadFootnote( element, location, BBB, C, V )
            elif element.tag == 'x':
                #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) )
                self.loadCrossreference( element, location )
            elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                self.loadCharacterFormatting( element, location, BBB, C, V )
            elif element.tag == 'cs': # character style -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'kf92' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) )
            elif element.tag in ('cp',): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'kdf0' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'lkj1' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'da13' )
                self.thisBook.addLine( marker, text )
            elif element.tag == 'ref': # encoded reference -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'bd83' )
                target = None
                for attrib,value in element.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
                #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
            elif element.tag == 'optionalLineBreak':
                print( "What is loadParagraph optionalLineBreak?" )
                if BibleOrgSysGlobals.debugFlag: halt
            elif element.tag == 'milestone': # e.g., <milestone sfm="pb" attribute=""/> (pb = explicit page break)
                BibleOrgSysGlobals.checkXMLNoText( element, location, 'jzx2' )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ms23' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'dw24' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("mcd2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('pb',): print( "milestone sfm got", repr(sfm) )
                self.thisBook.addLine( sfm, '' )
            else:
                logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) )
        return V
    # end of USFXXMLBible.loadParagraph


    def loadCharacterFormatting( self, element, location, BBB, C, V ):
        """
        """
        marker, text, tail = element.tag, clean(element.text), clean(element.tail)
        BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'sd12' )
        self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            #print( "element", repr(element.tag) )
            if subelement.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
                self.loadFootnote( subelement, sublocation, BBB, C, V )
            else:
                logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) )
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
        self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadCharacterFormatting


    def loadFigure( self, element, location ):
        """
        """
        BibleOrgSysGlobals.checkXMLNoText( element, location, 'ff36' )
        BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'cf35' )
        figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            figTag, figText = subelement.tag, clean(subelement.text)
            assert( figTag in figDict )
            figDict[figTag] = '' if figText is None else figText
            BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
            BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
            BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
        newString = ''
        for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
            newString += ('' if j==0 else '|') + figDict[tag]
        figTail = clean( element.tail )
        self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
    # end of USFXXMLBible.loadFigure


    def loadTable( self, element, location ):
        """
        """
        BibleOrgSysGlobals.checkXMLNoText( element, location, 'kg92' )
        BibleOrgSysGlobals.checkXMLNoTail( element, location, 'ka92' )
        BibleOrgSysGlobals.checkXMLNoAttributes( element, location, 'ks63' )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            if subelement.tag == 'tr':
                #print( "table", sublocation )
                self.thisBook.addLine( 'tr', '' )
                BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'sg32' )
                BibleOrgSysGlobals.checkXMLNoTail( subelement, sublocation, 'dh82' )
                BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'mniq' )
                for sub2element in subelement:
                    sub2location = sub2element.tag + " of " + sublocation
                    tag, text = sub2element.tag, clean(sub2element.text)
                    assert( tag in ('th', 'thr', 'tc', 'tcr',) )
                    BibleOrgSysGlobals.checkXMLNoTail( sub2element, sub2location, 'ah82' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' )
                    level = None
                    for attrib,value in sub2element.items():
                        if attrib == 'level': level = value
                        else:
                            logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    marker = tag + (level if level else '')
                    self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
            else:
                logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) )
    # end of USFXXMLBible.loadTable


    def loadFootnote( self, element, location, BBB, C, V ):
        """
        Handles footnote fields, including xt field.
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) )
            #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',):
                #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) )
            if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq','xt',) )
            if marker=='ref':
                assert( fText )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'ls13' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) )
                else: halt
            else:
                BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'dq54' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) )
                if marker=='xt' or marker[0]=='f': # Starts with f, e.g., fr, ft
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'js72' )
                        if marker2 == 'ref':
                            #print( sub2location )
                            if fText2:
                                #print( 'ft2', marker2, repr(fText2), repr(fTail2), sub2location )
                                self.thisBook.appendToLastLine( fText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value # OSIS style reference, e.g., '1SA.27.8'
                                else:
                                    logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                #print( 'tg', marker2, repr(target) )
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else:
                                if debuggingThisModule: halt
                        elif marker2 in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                            self.loadCharacterFormatting( sub2element, sub2location, BBB, C, V )
                        else:
                            print( 'Ignored marker2', repr(marker2), BBB, C, V )
                            if debuggingThisModule: halt
                        if fTail2: self.thisBook.appendToLastLine( fTail2 )
                elif marker in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                    self.loadCharacterFormatting( subelement, sublocation, BBB, C, V )
                else:
                    print( 'Ignored marker', repr(marker), BBB, C, V )
                    halt
            if fTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) )
        self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadFootnote


    def loadCrossreference( self, element, location ):
        """
        Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
            #if BibleOrgSysGlobals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
                #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
            if BibleOrgSysGlobals.debugFlag: assert( marker in ('ref','xo','xt',) )
            if marker=='ref':
                assert( xText )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
                else: halt
            else:
                BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
                if marker[0] == 'x': # Starts with x, e.g., xo, xt
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                        if marker2=='ref':
                            if xText2:
                                #print( 'xt2', marker2, repr(xText2), repr(xTail2), sub2location )
                                self.thisBook.appendToLastLine( xText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target: self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                        if xTail2: self.thisBook.appendToLastLine( xTail2 )
                else: halt
            if xTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
        self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )

예제 #47

파일 보기

    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' )
        if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search( '^; TITLE:\\s', line )
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "First line got type {!r} match from {!r}".format( match.group(0), line ) )
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue

                #print ( 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith( '; TITLE:' ):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith( '; ABBREVIATION:' ):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith( '; HAS ITALICS' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith( '; HAS FOOTNOTES:' ):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith( '; HAS FOOTNOTES' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith( '; HAS REDLETTER' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0]==';':
                    logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}".format( line ) )
                    continue # Just discard comment lines

                # Process the main segment
                if line.startswith( '$$ ' ):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #print( "pointer", repr(pointer) )
                    if pointer and pointer[0]=='{' and pointer[-1]=='}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #print( "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else: # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split( ' ', 1 )
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split( ':' )
                            chapterNumber = int( chapterNumberString )
                            verseNumber = int( verseNumberString )
                            if bookCode != lastBookCode: # We've started a new book
                                if bookCode in ('Ge',): BBB = 'GEN'
                                elif bookCode in ('Le',): BBB = 'LEV'
                                elif bookCode in ('La',): BBB = 'LAM'
                                ##elif bookCode in ('Es',): BBB = 'EST'
                                ##elif bookCode in ('Pr',): BBB = 'PRO'
                                #elif bookCode in ('So',): BBB = 'SNG'
                                #elif bookCode in ('La',): BBB = 'LAM'
                                #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #print( "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                                    if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                                    if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess
                                    #print( "4BBB =", repr(BBB) )
                        else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )
                    continue # Just save the pointer information which refers to the text on the next line
                else: # it's not a $$ line
                    text = line
                    #print( "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                        vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                        #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) )
                        #print( BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search( '\\{(.+?)\\}', vText )
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                            vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\{(.+?)\\}', vText )
                        # Convert [stuff] to added fields
                        match = re.search( '\\[(.+?)\\]', vText )
                        while match:
                            addText = '\\add {}\\add*'.format( match.group(1) )
                            vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\[(.+?)\\]', vText )
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search( '\\+r/(.+?)-r/', vText )
                        while match:
                            addText = '\\wj {}\\wj*'.format( match.group(1) )
                            vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\+r/(.+?)-r/', vText )
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                break


                if bookCode:
                    if bookCode != lastBookCode: # We've started a new book
                        if lastBookCode != -1: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code".format( bookCode ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCode yet
                    logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}".format( line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #print( "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata( 'Forge4SS' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()