Esempio n. 1
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book..."))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBB(bookName)
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print(_("Validating {} {}...").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                thisBook.addLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     ProgNameVersion))
                thisBook.addLine('h', bookName)
                thisBook.addLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        BibleOrgSysGlobals.checkXMLNoText(
                            element, sublocation, 'j3jd')
                        BibleOrgSysGlobals.checkXMLNoTail(
                            element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find {!r} but got {!r}".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if BibleOrgSysGlobals.verbosityLevel > 2:
                    print("  Saving {} into results...".format(BBB))
                self.saveBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: {!r}").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName
Esempio n. 2
0
    def __validateAndExtractBook(self, book, bookNumber):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3:
            print(_("Validating XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))

        if bookName:
            BBB = self.genericBOS.getBBBFromText(bookName)
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName)
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText(adjustedBookName)
        BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
            bookNumber)
        if BBB2 != BBB:  # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                print("Assuming that book {} {!r} is {} (not {})".format(
                    bookNumber, bookName, BBB2, BBB))
            BBB = BBB2
            #print( BBB ); halt

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print(_("Validating {} {}…").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'j3jd')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error(
                        "vb26 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.chapterTag, element.tag))
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print("  Saving {} into results…".format(BBB))
            self.stashBook(thisBook)
Esempio n. 3
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML book..."))

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib, value in book.items():
            if attrib == "bnumber":
                bookNumber = value
            elif attrib == "bname":
                bookName = value
            elif attrib == "bsname":
                bookShortName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookNumber:
            try:
                BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                    bookNumber)
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB(bookName)

        if BBB:
            if Globals.verbosityLevel > 2:
                print(_("Validating {} {}...").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = "Haggai XML Bible Book object"
            thisBook.objectTypeString = "Haggai"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format(BBB)
                    Globals.checkXMLNoAttributes(element, sublocation, 'jhl6')
                    Globals.checkXMLNoSubelements(element, sublocation, 'jk21')
                    Globals.checkXMLNoTail(element, sublocation, 'kjh6')
                    thisBook.appendLine('mt', element.text)
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    Globals.checkXMLNoText(element, sublocation, 'j3jd')
                    Globals.checkXMLNoTail(element, sublocation, 'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        HaggaiXMLBible.chapterTag, element.tag))
            if Globals.verbosityLevel > 2:
                print("  Saving {} into results...".format(BBB))
            self.saveBook(thisBook)
Esempio n. 4
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'Haggai XML Bible Book object'
            thisBook.objectTypeString = 'Haggai'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' )
                    thisBook.addLine( 'mt', element.text )
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )
Esempio n. 5
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book..."))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBB(bookName)
            if BBB:
                if Globals.verbosityLevel > 2:
                    print(_("Validating {} {}...").format(BBB, bookName))
                thisBook = BibleBook(self.name, BBB)
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        Globals.checkXMLNoText(element, sublocation, 'j3jd')
                        Globals.checkXMLNoTail(element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find '{}' but got '{}'".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if Globals.verbosityLevel > 2:
                    print("  Saving {} into results...".format(BBB))
                self.saveBook(thisBook)
            logging.error(
                _("OpenSong load doesn't recognize book name: '{}'").format(
                    bookName))
        logging.error(_("OpenSong load can't find a book name"))
Esempio n. 6
0
    def __validateAndExtractBook( self, book, bookNumber ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )

        if bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName )
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText( adjustedBookName )
        BBB2 = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
        if BBB2 != BBB: # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                print( "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) )
            BBB = BBB2
            #print( BBB ); halt

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )
Esempio n. 7
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating OpenSong XML book…") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBBFromText( bookName ) # Booknames are usually in English
            if not BBB: # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText( bookName ) # Try non-English booknames
                #print( "bookName", bookName, BBB )
            if BBB:
                if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.addLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.addLine( 'h', bookName )
                thisBook.addLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                        BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
                self.stashBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: {!r}").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
Esempio n. 8
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print( _("Validating XML book...") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB( bookName )

        if BBB:
            if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
            thisBook = BibleBook( self.name, BBB )
            thisBook.objectNameString = "Zefania XML Bible Book object"
            thisBook.objectTypeString = "Zefania"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == ZefaniaXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    Globals.checkXMLNoText( element, sublocation, 'j3jd' )
                    Globals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find '{}' but got '{}'".format( ZefaniaXMLBible.chapterTag, element.tag ) )
            if Globals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
            self.saveBook( thisBook )
Esempio n. 9
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.error( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except (KeyError, ValueError):
                logging.critical( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        if BBB is None and bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'Zefania XML Bible Book object'
            thisBook.objectTypeString = 'Zefania'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == ZefaniaXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( ZefaniaXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )
Esempio n. 10
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBB( bookName )
            if BBB:
                if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = Globals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.appendLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.appendLine( 'h', bookName )
                thisBook.appendLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        Globals.checkXMLNoText( element, sublocation, 'j3jd' )
                        Globals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if Globals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
                self.saveBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
Esempio n. 11
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()
Esempio n. 12
0
    def loadBook( self, BBB ):
        """
        Load the requested book out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("loadBook( {} )").format( BBB ) )
        assert self.preloadDone

        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag: print( "  {} is already loaded -- returning".format( BBB ) )
            return # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name ) )
            return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath ) )

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
            #testament, BBB = 'BOTH', 'GEN'
            #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
            #testament, BBB = 'OT', 'GEN'
            #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
            #testament, BBB = 'NT', 'MAT'
            #booksExpected, textLineCountExpected = 1, 7957


        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C <= numC: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1
                else: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  MySword saving", BBB )
                        self.stashBook( thisBook )
                    #else: print( "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False
Esempio n. 13
0
    def load( self ):
        """
        Load all the books out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("load()") )
        assert self.preloadDone

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )


        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3: print( "  MySword saving", BBB, bookCount+1 )
                        self.stashBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        self.applySuppliedMetadata( 'MySword' ) # Copy some to self.settingsDict
        self.doPostLoadProcessing()
Esempio n. 14
0
    def loadBook(self, BBB):
        """
        Load the requested book out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("loadBook( {} )").format(BBB))
        assert self.preloadDone

        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag:
                print("  {} is already loaded -- returning".format(BBB))
            return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading MySwordBible {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(
                _("MySwordBible: Loading {} from {}…").format(
                    BBB, self.sourceFilepath))

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'BOTH', 'GEN'
        #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
        #testament, BBB = 'OT', 'GEN'
        #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'NT', 'MAT'
        #booksExpected, textLineCountExpected = 1, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C <= numC:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1
                else:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 2:
                            print("  MySword saving", BBB)
                        self.stashBook(thisBook)
                    #else: print( "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
Esempio n. 15
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        def decodeVerse( encodedVerseString ):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith( '@@' ): # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith( '@@' ): # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace( '@^', '\\\\p ' )
            verseString = verseString.replace( '@0', '\\\\m ' )
            verseString = verseString.replace( '@1', '\\\\q1 ' ).replace( '@2', '\\\\q2 ' ).replace( '@3', '\\\\q3 ' ).replace( '@4', '\\q4 ' )
            verseString = verseString.replace( '@8', '\\\\m ' )
            # Character markers (marked now with single backslash)
            verseString = verseString.replace( '@6', '\\wj ' ).replace( '@5', '\\wj*' )
            verseString = verseString.replace( '@9', '\\add ' ).replace( '@7', '\\add*' ) # or \\i ???
            verseString = re.sub( r'@<f([0-9])@>@/', r'\\ff\1', verseString )
            verseString = re.sub( r'@<x([0-9])@>@/', r'\\xx\1', verseString )
            #print( repr( verseString ) )
            assert( '@' not in verseString )
            return verseString
        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(), OrderedDict(), {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      YETBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'YETBible file line is "' + line + '"' )

                bits = line.split( '\t' )
                #print( self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert( len(bits) == 3 )
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert( 2 <= len(locale) <= 3 )
                        if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert( 3 <= len(bits) <= 4 )
                    thisBBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1] )
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert( len(bits) == 5 )
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB: # We have a new book
                        if lastBBB is not None: # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert( BBB in bookNameDict )
                        bookLines = OrderedDict() # Keys are (C,V) strings
                    verseString = decodeVerse( encodedVerseString )
                    bookLines[(chapterNumberString,verseNumberString)] = verseString # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert( len(bits) == 5 )
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    headingString = encodedHeadingString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    #print( repr(encodedHeadingString), repr(headingString) )
                    assert( '@' not in headingString )
                    headingDict[(BBB,chapterNumberString,verseNumberString)] = headingString, [] # Blank refList
                    continue
                elif bits[0] == 'parallel': # These lines optionally follow pericope lines
                    assert( len(bits) == 2 )
                    heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)]
                    refList.append( bits[1] )
                    #print( "parallel2", repr(heading), refList )
                    headingDict[(BBB,chapterNumberString,verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert( len(bits) == 6 )
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                        assert( indexNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now
                    noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace( '@/', '' )
                    #print( repr(encodedNoteString), repr(noteString) )
                    assert( '@' not in noteString )
                    xrefDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert( len(bits) == 6 )
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                        assert( indexNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    assert( '@' not in noteString )
                    footnoteDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString
                    continue
                else: print( "YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt
            bookDict[lastBBB] = bookLines # Save the last book


                #if bookCode != lastBookCode: # We've started a new book
                    #if lastBookCode != -1: # Better save the last book
                        #self.saveBook( thisBook )
                    #BBB = Globals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode )
                    #thisBook = BibleBook( self.name, BBB )
                    #thisBook.objectNameString = "YET Bible Book object"
                    #thisBook.objectTypeString = "YET"
                    #lastBookCode = bookCode
                    #lastChapterNumber = lastVerseNumber = -1

                #if chapterNumber != lastChapterNumber: # We've started a new chapter
                    #if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    #if chapterNumber == 0:
                        #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #thisBook.appendLine( 'c', chapterNumberString )
                    #lastChapterNumber = chapterNumber
                    #lastVerseNumber = -1

                ## Handle the verse info
                #if verseNumber==lastVerseNumber and vText==lastVText:
                    #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    ## Move Psalm titles to verse zero
                    #verseNumber = 0
                #if verseNumber < lastVerseNumber:
                    #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #elif verseNumber == lastVerseNumber:
                    #if vText == lastVText:
                        #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #else:
                        #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                #lastVText = vText
                #lastVerseNumber = verseNumber

        # Now process the books
        for BBB,bkData in bookDict.items():
            #print( "Processing", BBB )
            thisBook = BibleBook( self.name, BBB )
            thisBook.objectNameString = "YET Bible Book object"
            thisBook.objectTypeString = "YET"
            lastChapterNumberString = None
            for (chapterNumberString,verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB,chapterNumberString,verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)]
                    #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.appendLine( 's', heading )
                    if refList:
                        refString = ""
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.appendLine( 'r', '('+refString+')' )
                # Insert footnotes and cross-references
                while( '\\ff' in verseString ):
                    #print( "footnote", repr(verseString) )
                    fIx = verseString.index( '\\ff' )
                    caller = verseString[fIx+3]
                    #print( "fcaller", repr(caller) )
                    assert( caller.isdigit() )
                    note = footnoteDict[(BBB,chapterNumberString,verseNumberString,caller)]
                    #print( "fnote", repr(note) )
                    verseString = verseString[:fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[fIx+4:]
                    #print( "fvS", repr(verseString) )
                while( '\\xx' in verseString ):
                    #print( "xref", repr(verseString) )
                    fIx = verseString.index( '\\xx' )
                    caller = verseString[fIx+3]
                    #print( "xcaller", repr(caller) )
                    assert( caller.isdigit() )
                    note = xrefDict[(BBB,chapterNumberString,verseNumberString,caller)]
                    #print( "xnote", repr(note) )
                    verseString = verseString[:fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[fIx+4:]
                    #print( "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumberString = chapterNumberString
                #print( BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith( '\\\\' ):  # It's an initial paragraph marker
                    if verseString[3]==' ': marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4]==' ': marker, verseString = verseString[2:4], verseString[5:]
                    else: halt
                    #print( '', '\\'+marker )
                    thisBook.appendLine( marker, '' )
                assert( not verseString.startswith( '\\\\' ) )
                bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers)
                for j,bit in enumerate(bits):
                    #print( "loop", j, repr(bit), repr(verseString) )
                    if j==0: thisBook.appendLine( 'v', verseNumberString + ' ' + verseString.rstrip() )
                    else:
                        if bit[1]==' ': marker, bit = bit[0], bit[2:]
                        elif bit[2]==' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #print( "mV", marker, repr(bit), repr(verseString) )
                        thisBook.appendLine( marker, bit.rstrip() )
            self.saveBook( thisBook )
        self.doPostLoadProcessing()
Esempio n. 16
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BibleFilenameEndingsToAccept[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute('select * from Details')
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(
                self.settingsDict['Description']) < 40:
            self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict:
            self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename, self.settingsDict['encryption']))

        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Found missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.settingsDict:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}"
                            .format(BBB, C, V, repr(line)))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}"
                            .format(BBB, C, V, repr(line), self.settingsDict))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("Saving", BBB, bookCount + 1)
                        self.saveBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList(BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()
Esempio n. 17
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        status = 0 # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1:
                    if line[0]==chr(65279): #U+FEFF
                        logging.info( "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) )
                        line = line[1:] # Remove the UTF-16 Unicode Byte Order Marker (BOM)
                    elif line[:3] == '': # 0xEF,0xBB,0xBF
                        logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) )
                        line = line[3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM)
                if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines

                #print ( 'DB file line is "' + line + '"' )
                if line[0] == '#': continue # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning( "Unknown DrupalBible first line: {}".format( repr(line) ) )

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else: # Get the version name details
                        bits = line.split( '|' )
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else: # Get the book name details
                        bits = line.split( '|' )
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert bookShortName == bookCode
                        BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode )
                        BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first)
                        bookDetails[BBB] = bookFullName, bookShortName, numChapters

                elif status == 2: # Get the verse text
                    bits = line.split( '|' )
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark: print( repr(lineMark) ); halt
                    BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode )
                    BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.stashBook( thisBook )
                        thisBook = BibleBook( self, BBB )
                        thisBook.objectNameString = 'DrupalBible Bible Book object'
                        thisBook.objectTypeString = 'DrupalBible'
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.addLine( 'c', chapterNumberString )
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace( '<', '\\it ' ).replace( '>', '\\it*' )
                    thisBook.addLine( 'v', verseNumberString + ' ' + verseText )

                else: halt

        # Save the final book
        self.stashBook( thisBook )
        self.doPostLoadProcessing()
Esempio n. 18
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' )
        if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line )
                    if match: vplType = 1
                    else:
                        match = re.search( '^(\\d{8})\\s', line )
                        if match: vplType = 2
                        else:
                            match = re.search( '^# language_name:\\s', line )
                            if match: vplType = 3
                            #else:
                                #match = re.search( '^; TITLE:\\s', line )
                                #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) )
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue
                    #print( 'vplType', vplType )

                #print ( 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if   line.startswith( '# language_name:' ):
                        string = line[16:].strip()
                        if string and string != 'Not available': settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith( '# closest ISO 639-3:' ):
                        string = line[20:].strip()
                        if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith( '# year_short:' ):
                        string = line[13:].strip()
                        if string and string != 'Not available': settingsDict['Year.short'] = string
                        continue
                    elif line.startswith( '# year_long:' ):
                        string = line[12:].strip()
                        if string and string != 'Not available': settingsDict['Year.long'] = string
                        continue
                    elif line.startswith( '# title:' ):
                        string = line[8:].strip()
                        if string and string != 'Not available': settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith( '# URL:' ):
                        string = line[6:].strip()
                        if string and string != 'Not available': settingsDict['URL'] = string
                        continue
                    elif line.startswith( '# copyright_short:' ):
                        string = line[18:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith( '# copyright_long:' ):
                        string = line[17:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.long'] = string
                        continue
                    elif line[0]=='#':
                        logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) )
                        continue # Just discard comment lines
                #elif vplType == 4:
                    #if line.startswith( '; TITLE:' ):
                        #string = line[8:].strip()
                        #if string: settingsDict['TITLE'] = string
                        #continue
                    #elif line.startswith( '; ABBREVIATION:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['ABBREVIATION'] = string
                        #continue
                    #elif line.startswith( '; HAS ITALICS:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_ITALICS'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES' ):
                        #string = line[14:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS REDLETTER:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_REDLETTER'] = string
                        #continue
                    #elif line[0]==';':
                        #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                        #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split( ' ', 2 )
                    #print( self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCode, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split( ':' )
                    else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )

                    if not bookCode and not chapterNumberString and not verseNumberString:
                        print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        continue
                    if BibleOrgSysGlobals.debugFlag: assert 2  <= len(bookCode) <= 4
                    if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int( chapterNumberString )
                    verseNumber = int( verseNumberString )

                    if bookCode != lastBookCode: # We've started a new book
                        #if bookCode in ('Ge',): BBB = 'GEN'
                        if bookCode in ('Le',): BBB = 'LEV'
                        elif bookCode in ('Jud',): BBB = 'JDG'
                        #elif bookCode in ('Es',): BBB = 'EST'
                        #elif bookCode in ('Pr',): BBB = 'PRO'
                        elif bookCode in ('So',): BBB = 'SNG'
                        elif bookCode in ('La',): BBB = 'LAM'
                        #elif bookCode in ('Jude',): BBB = 'JDE'
                        else:
                            #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCode )  # Try to guess
                            BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                            if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                            if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0]=='«':
                        #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB=='PSA' and verseNumberString=='1': # Psalm title
                            vBits = vText[1:].split( '»' )
                            #print( "vBits", vBits )
                            thisBook.addLine( 'd', vBits[0] ) # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                        #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        #continue
                    if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                        #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                        #if vText == lastVText:
                            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        #else:
                            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                elif vplType in (2,3):
                    bits = line.split( '\t', 1 )
                    #print( self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:]
                    #print( bookNumberString, chapterNumberString, verseNumberString )
                    while len(chapterNumberString)>1 and chapterNumberString[0]=='0':
                        chapterNumberString = chapterNumberString[1:] # Remove leading zeroes
                    while len(verseNumberString)>1 and verseNumberString[0]=='0':
                        verseNumberString = verseNumberString[1:] # Remove leading zeroes
                    bookCode, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCode != lastBookCode: # We've started a new book
                        bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS',
                                76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2',
                                85:'PSS', 86:'ODE', }
                        if 1 <= bookCode <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCode )
                        else: BBB = bnDict[bookCode]

                #elif vplType == 4:
                    #if line.startswith( '$$ ' ):
                        #if metadataName and metadataContents:
                            #settingsDict[metadataName] = metadataContents
                            #metadataName = None
                        #pointer = line[3:]
                        ##print( "pointer", repr(pointer) )
                        #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                            #metadataName = pointer[1:-1]
                            #if metadataName:
                                ##print( "metadataName", repr(metadataName) )
                                #metadataContents = ''
                        #else: # let's assume it's a BCV reference
                            #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                            #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                            #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                            #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                            #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                            #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                            #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                            #B_CV_Bits = pointer.split( ' ', 1 )
                            #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                                #bookCode, CVString = B_CV_Bits
                                #chapterNumberString, verseNumberString = CVString.split( ':' )
                                #chapterNumber = int( chapterNumberString )
                                #verseNumber = int( verseNumberString )
                                #if bookCode != lastBookCode: # We've started a new book
                                    #if bookCode in ('Ge',): BBB = 'GEN'
                                    #elif bookCode in ('Le',): BBB = 'LEV'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #else:
                                        ##print( "4BookCode =", repr(bookCode) )
                                        ##BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                        #BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                                        #if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                                        #if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess
                                        ##print( "4BBB =", repr(BBB) )
                            #else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )
                        #continue # Just save the pointer information which refers to the text on the next line
                    #else: # it's not a $$ line
                        #text = line
                        ##print( "text", repr(text) )
                        #if metadataName:
                            #metadataContents += ('\n' if metadataContents else '') + text
                            #continue
                        #else:
                            #vText = text
                            ## Handle bits like (<scripref>Pr 2:7</scripref>)
                            #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                            #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                            ##if '\\' in vText: print( 'VPL vText', repr(vText) )
                            #if vplType == 4: # Forge for SwordSearcher
                                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                ## Convert {stuff} to footnotes
                                #match = re.search( '\\{(.+?)\\}', vText )
                                #while match:
                                    #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                                    #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                                    ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\{(.+?)\\}', vText )
                                ## Convert [stuff] to added fields
                                #match = re.search( '\\[(.+?)\\]', vText )
                                #while match:
                                    #addText = '\\add {}\\add*'.format( match.group(1) )
                                    #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                                    ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\[(.+?)\\]', vText )
                                #for badChar in '{}[]':
                                    #if badChar in vText:
                                        #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                        #break

                else:
                    logging.critical( 'Unknown VPL type {}'.format( vplType ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt

                if bookCode:
                    if bookCode != lastBookCode: # We've started a new book
                        if lastBookCode != -1: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCode ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCode yet
                    logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #print( "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Esempio n. 19
0
    def load( self ):
        """
        Load the compressed data file and import book elements.
        """
        import zlib
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) )
        with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done
            fileBytes = myFile.read()
        if BibleOrgSysGlobals.debugFlag: print( "  {:,} bytes read".format( len(fileBytes) ) )

        keep = {}
        index = 0
        #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] )
        keep['block1'] = fileBytes[index:index+32]
        hString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            hString += chr( char8 )
        if BibleOrgSysGlobals.debugFlag: print( 'block1b', hexlify( fileBytes[index+j:index+32] ) )
        # Skipped some (important?) binary here
        index += 32
        if BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index )
        assert hString == 'EasyWorship Bible Text'

        #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] )
        keep['block2'] = fileBytes[index:index+56]
        nString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            nString += chr( char8 )
        # Skipped some zeroes here
        index += 56
        if BibleOrgSysGlobals.debugFlag: print( 'nString', repr(nString), index )
        self.name = nString

        rawBooks = []
        for b in range( 1, 66+1 ):
            bookAbbrev = ''
            for j in range( 0, 32 ):
                char8 = fileBytes[index+j]
                #print( char8, repr(char8) )
                if char8 < 0x20: break
                bookAbbrev += chr( char8 )
            # Skipped some zeroes here
            index += 51
            if bookAbbrev and bookAbbrev[-1] == '.': bookAbbrev = bookAbbrev[:-1] # Remove final period
            if BibleOrgSysGlobals.verbosityLevel > 2: print( 'bookAbbrev', repr(bookAbbrev) )
            numChapters = fileBytes[index]
            numVerses = []
            for j in range( 0, numChapters ):
                numVerses.append( fileBytes[index+j+1] )
            # Skipped some zeroes here
            index += 157
            if BibleOrgSysGlobals.debugFlag:
                print( ' ', numChapters, numVerses )
            bookStart, = struct.unpack( "<I", fileBytes[index:index+4] )
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print( '  bookStart', bookStart )
            bookLength, = struct.unpack( "<I", fileBytes[index:index+4] )
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print( '  bookLength', bookLength, bookStart+bookLength )
            bookBytes = fileBytes[bookStart:bookStart+bookLength]
            assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header
            rawBooks.append( (bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes) )

        if BibleOrgSysGlobals.debugFlag: print( 'unknown block3', index, hexlify( fileBytes[index:index+30] ) )
        keep['block3'] = fileBytes[index:index+30]
        length3, = struct.unpack( "<I", fileBytes[index:index+4] )
        if length3:
            block3 = fileBytes[index+4:index+4+length3-4]
            byteResult = zlib.decompress( block3 )
            textResult = byteResult.decode( 'utf8' )
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print( "Got", len(textResult), textResult, 'from', length3 )
            keep['block3n'] = textResult
            if self.name: print( 'Overwriting module name {!r} with {!r}'.format( self.name, textResult ) )
            self.name = textResult
        index += length3
        if BibleOrgSysGlobals.debugFlag: print( 'end of contents', index, hexlify( fileBytes[index:index+60] ) )
        keep['block4'] = rawBooks[0][3]

        block5 = fileBytes[index:rawBooks[0][3]]
        keep['block5'] = block5
        index += len( block5 )
        #if self.abbreviation in ( 'TB', ): # Why don't the others work
        assert index == rawBooks[0][3] # Should now be at the start of the first book (already fetched above)

        assert len(rawBooks) == 66
        # Look at extra stuff at end
        endBytes = fileBytes[bookStart+bookLength:]
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes )
        assert len(endBytes) == 16
        keep['block9'] = endBytes
        # Skipped some binary and some text here
        del fileBytes

        # Now we have to decode the book text (compressed about 4x with zlib)
        for j, BBB in enumerate( BOS.getBookList() ):
            if BibleOrgSysGlobals.verbosityLevel > 2: print( '  Decoding {}…'.format( BBB ) )
            bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j]
            byteResult = zlib.decompress( bookBytes )
            textResult = byteResult.decode( 'utf8' )
            if '\t' in textResult:
                logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) )
                textResult = textResult.replace( '\t', ' ' )
            #print( textResult )
            if BibleOrgSysGlobals.strictCheckingFlag: assert '  ' not in textResult

            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'EasyWorship Bible Book object'
            thisBook.objectTypeString = 'EasyWorship Bible'
            if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev )

            C = V = '0'
            for line in textResult.split( '\r\n' ):
                if not line: continue # skip blank lines
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) )
                assert line[0].isdigit()
                assert ':' in line[:4]
                CV,verseText = line.split( ' ', 1 )
                newC,newV = CV.split( ':' )
                #print( newC, V, repr(verseText) )
                if newC != C:
                    if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order
                        print( "Skipping error for out-of-order chapters in {}!".format( BBB ) )
                    else: assert int(newC) > int(C)
                    C, V = newC, '0'
                    thisBook.addLine( 'c', C )
                if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats
                    if int(newV) < int(V): break
                elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order
                    print( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                else:
                    try: assert int(newV) > int(V)
                    except ValueError:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "Something's not an integer around {} {}:{} {}".format( BBB, C, V, verseText ) )
                V = newV
                thisBook.addLine( 'v', V + ' ' + verseText )

            if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB )
            self.stashBook( thisBook )

        self.doPostLoadProcessing()
        return keep
Esempio n. 20
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    logging.info( "      VPLBible.load: Detected UTF-16 Byte Order Marker" )
                    line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'VLP file line is "' + line + '"' )
                if line[0]=='#': continue # Just discard comment lines

                bits = line.split( ' ', 2 )
                #print( self.givenName, BBB, bits )
                if len(bits) == 3 and ':' in bits[1]:
                    bookCode, CVString, vText = bits
                    chapterNumberString, verseNumberString = CVString.split( ':' )
                else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BibleOrgSysGlobals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() )
                if not verseNumberString.isdigit():
                    logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert( verseNumberString.isdigit() )
                    continue
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookCode != lastBookCode: # We've started a new book
                    if lastBookCode != -1: # Better save the last book
                        self.saveBook( thisBook )
                    #if bookCode in ('Ge',): BBB = 'GEN'
                    #elif bookCode in ('Le',): BBB = 'LEV'
                    ##elif bookCode in ('Jud',): BBB = 'JDG'
                    #elif bookCode in ('Es',): BBB = 'EST'
                    #elif bookCode in ('Pr',): BBB = 'PRO'
                    #elif bookCode in ('So',): BBB = 'SNG'
                    #elif bookCode in ('La',): BBB = 'LAM'
                    #elif bookCode in ('Jude',): BBB = 'JDE'
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB( bookCode )  # Try to guess
                    if BBB:
                        thisBook = BibleBook( self, BBB )
                        thisBook.objectNameString = "VPL Bible Book object"
                        thisBook.objectTypeString = "VPL"
                        lastBookCode = bookCode
                        lastChapterNumber = lastVerseNumber = -1
                    else:
                        logging.critical( "VPLBible could not figure out {!r} book code".format( bookCode ) )
                        if BibleOrgSysGlobals.debugFlag: halt

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    thisBook.addLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle special formatting
                #   [brackets] are for Italicized words
                #   <brackets> are for the Words of Christ in Red
                #   «brackets»  are for the Titles in the Book  of Psalms.
                vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                if vText and vText[0]=='«':
                    #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                    if BBB=='PSA' and verseNumberString=='1': # Psalm title
                        vBits = vText[1:].split( '»' )
                        #print( "vBits", vBits )
                        thisBook.addLine( 'd', vBits[0] ) # Psalm title
                        vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
Esempio n. 21
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        lastLine, lineCount = '', 0
        BBB = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and self.encoding.lower(
                ) == 'utf-8' and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        "      VPLBible.load: Detected UTF-16 Byte Order Marker"
                    )
                    line = line[1:]  # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'VLP file line is "' + line + '"' )
                if line[0] == '#': continue  # Just discard comment lines

                bits = line.split(' ', 2)
                #print( self.givenName, BBB, bits )
                if len(bits) == 3 and ':' in bits[1]:
                    bookCode, CVString, vText = bits
                    chapterNumberString, verseNumberString = CVString.split(
                        ':')
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print("Skipping empty line in {} {} {} {}:{}".format(
                        self.givenName, BBB, bookCode, chapterNumberString,
                        verseNumberString))
                    continue
                if BibleOrgSysGlobals.debugFlag:
                    assert (2 <= len(bookCode) <= 4)
                if BibleOrgSysGlobals.debugFlag:
                    assert (chapterNumberString.isdigit())
                if not verseNumberString.isdigit():
                    logging.error(
                        "Invalid verse number field at {}/{} {}:{!r}".format(
                            bookCode, BBB, chapterNumberString,
                            verseNumberString))
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                        assert (verseNumberString.isdigit())
                    continue
                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.saveBook(thisBook)
                    #if bookCode in ('Ge',): BBB = 'GEN'
                    #elif bookCode in ('Le',): BBB = 'LEV'
                    ##elif bookCode in ('Jud',): BBB = 'JDG'
                    #elif bookCode in ('Es',): BBB = 'EST'
                    #elif bookCode in ('Pr',): BBB = 'PRO'
                    #elif bookCode in ('So',): BBB = 'SNG'
                    #elif bookCode in ('La',): BBB = 'LAM'
                    #elif bookCode in ('Jude',): BBB = 'JDE'
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBB(
                        bookCode)  # Try to guess
                    if BBB:
                        thisBook = BibleBook(self, BBB)
                        thisBook.objectNameString = "VPL Bible Book object"
                        thisBook.objectTypeString = "VPL"
                        lastBookCode = bookCode
                        lastChapterNumber = lastVerseNumber = -1
                    else:
                        logging.critical(
                            "VPLBible could not figure out {!r} book code".
                            format(bookCode))
                        if BibleOrgSysGlobals.debugFlag: halt

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert (chapterNumber > lastChapterNumber
                                or BBB == 'ESG'
                                )  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle special formatting
                #   [brackets] are for Italicized words
                #   <brackets> are for the Words of Christ in Red
                #   «brackets»  are for the Titles in the Book  of Psalms.
                vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                if vText and vText[0] == '«':
                    #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                    if BBB == 'PSA' and verseNumberString == '1':  # Psalm title
                        vBits = vText[1:].split('»')
                        #print( "vBits", vBits )
                        thisBook.addLine('d', vBits[0])  # Psalm title
                        vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()
Esempio n. 22
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' )
        if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search( '^; TITLE:\\s', line )
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "First line got type {!r} match from {!r}".format( match.group(0), line ) )
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue

                #print ( 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith( '; TITLE:' ):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith( '; ABBREVIATION:' ):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith( '; HAS ITALICS' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith( '; HAS FOOTNOTES:' ):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith( '; HAS FOOTNOTES' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith( '; HAS REDLETTER' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0]==';':
                    logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}".format( line ) )
                    continue # Just discard comment lines

                # Process the main segment
                if line.startswith( '$$ ' ):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #print( "pointer", repr(pointer) )
                    if pointer and pointer[0]=='{' and pointer[-1]=='}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #print( "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else: # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split( ' ', 1 )
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split( ':' )
                            chapterNumber = int( chapterNumberString )
                            verseNumber = int( verseNumberString )
                            if bookCode != lastBookCode: # We've started a new book
                                if bookCode in ('Ge',): BBB = 'GEN'
                                elif bookCode in ('Le',): BBB = 'LEV'
                                elif bookCode in ('La',): BBB = 'LAM'
                                ##elif bookCode in ('Es',): BBB = 'EST'
                                ##elif bookCode in ('Pr',): BBB = 'PRO'
                                #elif bookCode in ('So',): BBB = 'SNG'
                                #elif bookCode in ('La',): BBB = 'LAM'
                                #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #print( "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                                    if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                                    if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess
                                    #print( "4BBB =", repr(BBB) )
                        else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )
                    continue # Just save the pointer information which refers to the text on the next line
                else: # it's not a $$ line
                    text = line
                    #print( "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                        vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                        #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) )
                        #print( BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search( '\\{(.+?)\\}', vText )
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                            vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\{(.+?)\\}', vText )
                        # Convert [stuff] to added fields
                        match = re.search( '\\[(.+?)\\]', vText )
                        while match:
                            addText = '\\add {}\\add*'.format( match.group(1) )
                            vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\[(.+?)\\]', vText )
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search( '\\+r/(.+?)-r/', vText )
                        while match:
                            addText = '\\wj {}\\wj*'.format( match.group(1) )
                            vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\+r/(.+?)-r/', vText )
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                break


                if bookCode:
                    if bookCode != lastBookCode: # We've started a new book
                        if lastBookCode != -1: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code".format( bookCode ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCode yet
                    logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}".format( line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #print( "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata( 'Forge4SS' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()
    def load( self ):
        """
        Load the compressed data file and import book objects.
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("\nLoading {}…").format( self.sourceFilepath ) )
        with open( self.sourceFilepath, 'rb' ) as myFile: # Automatically closes the file when done
            fileBytes = myFile.read()
        if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            print( "  {:,} bytes read".format( len(fileBytes) ) )

        keep = OrderedDict()
        index = 0

        # Block 1 is 32-bytes long and always the same for EW2009 Bibles
        #if debuggingThisModule: print( 'introBlock', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] )
        keep['introBlock'] = (index,fileBytes[index:index+32])
        hString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            hString += chr( char8 )
        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag: print( 'hString', repr(hString), index )
        if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag:
            assert hString == 'EasyWorship Bible Text'
        introBlockb = fileBytes[index+j:index+32]
        #if BibleOrgSysGlobals.debugFlag: print( 'introBlockb', hexlify( introBlockb ), introBlockb )
        assert introBlockb == b'\x1a\x02<\x00\x00\x00\xe0\x00\x00\x00' # b'1a023c000000e0000000'
        # Skipped some (important?) binary here??? but it's the same for every module
        index += 32

        # Block 2 is 56-bytes long
        moduleNameBlock = fileBytes[index:index+56]
        keep['moduleNameBlock'] = (index,moduleNameBlock)
        #if debuggingThisModule: print( 'moduleNameBlock', hexlify( moduleNameBlock ), moduleNameBlock )
        nString = ''
        for j in range( 0, 32 ):
            char8 = fileBytes[index+j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            nString += chr( char8 )
        #if BibleOrgSysGlobals.debugFlag or debuggingThisModule: print( 'nString', repr(nString), index )
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( "EasyWorshipBible.load: " + _("Setting module name to {!r}").format( self.name ) )
        self.name = nString
        #assert self.name # Not there for amp and gkm
        moduleNameBlockb = fileBytes[index+j:index+56]
        #if BibleOrgSysGlobals.debugFlag: print( 'moduleNameBlockb', len(moduleNameBlockb), hexlify( moduleNameBlockb ), moduleNameBlockb )
        #assert moduleNameBlockb.endswith( b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00' ) # b'000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000'
        for ix in range( index+j, index+56 ): # Mostly zeroes remaining
            if ix == 84: # What does this mean???
                value = fileBytes[ix]
                assert value in (0,1,2,3,4,5) # bbe=0, alb=1, esv2=2, esv=3, asv=4 nasb=5 Revision number???
                keep['byte84'] = (index,value)
            else: assert fileBytes[ix] == 0
        index += 56

        # Get the optional booknames and the raw data for each book into a list
        rawBooks = []
        for bookNumber in range( 1, 66+1 ):
            bookInfoBlock = fileBytes[index:index+51]
            blockName = 'bookInfoBlock-{}'.format( bookNumber )
            keep[blockName] = (index,bookInfoBlock)
            #if debuggingThisModule: print( blockName, hexlify( bookInfoBlock ), bookInfoBlock )
            bookName = ''
            for j in range( 0, 32 ):
                char8 = fileBytes[index+j]
                #print( char8, repr(char8) )
                if char8 < 0x20: break # bookName seems quite optional -- maybe the English ones are assumed if empty???
                bookName += chr( char8 )
            assert fileBytes[index+j:index+51] == b'\x00' * (51-j) # Skipped some zeroes here
            index += 51
            if bookName and bookName[-1] == '.': bookName = bookName[:-1] # Remove final period
            #if debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 2:
                #print( 'bookName', repr(bookName) )
            numChapters = fileBytes[index]
            numVerses = []
            for j in range( 0, numChapters ):
                numVerses.append( fileBytes[index+j+1] )
            #print( "here1", 157-j-2, hexlify(fileBytes[index+j+2:index+157]), fileBytes[index+j+2:index+157] )
            if self.abbreviation != 'fn1938': # Why does this fail???
                assert fileBytes[index+j+2:index+157] == b'\x00' * (157-j-2) # Skipped some zeroes here
            index += 157
            #if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
                #print( ' {!r} numChapters={} verses={}'.format( bookName, numChapters, numVerses ) )
            bookStart, = struct.unpack( "<I", fileBytes[index:index+4] )
            assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here
            index += 8
            #if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
                #print( '    bookStart is at {:,}'.format( bookStart ) )
            bookLength, = struct.unpack( "<I", fileBytes[index:index+4] )
            assert fileBytes[index+4:index+8] == b'\x00' * 4 # Skipped some zeroes here
            index += 8
            #if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
                #print( '    {} bookLength is {:,} which goes to {:,}'.format( bookNumber, bookLength, bookStart+bookLength ) )
            bookBytes = fileBytes[bookStart:bookStart+bookLength] # Looking ahead into the file
            rawBooks.append( (bookName, numChapters, numVerses, bookStart, bookLength, bookBytes) )
            if bookLength == 0: # e.g., gkm Philippians (book number 50)
                logging.critical( "Booknumber {} is empty in {}".format( bookNumber, self.abbreviation ) )
            else:
                #if debuggingThisModule:
                    #print( "cHeader1 for {}: {}={} {}={}".format( self.abbreviation, bookBytes[0], hexlify(bookBytes[0:1]), bookBytes[1], hexlify(bookBytes[1:2]) ) )
                assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header (for compression levels 7-9)
        assert index == 14872 # 32 + 56 + 224*66

        workNameBlock = fileBytes[index:index+30] # 30 here is just a maximum, not fixed
        keep['workNameBlock'] = (index,workNameBlock) # This block starts with a length, then a work name, e.g., ezFreeASV
        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            #print( 'workNameBlock', index, hexlify(workNameBlock), workNameBlock )
        length3, = struct.unpack( "<I", fileBytes[index:index+4] )
        #print( "length3", length3 ) # Seems to include the compressed string plus six more bytes
        keep['length3'] = (index,length3)
        if length3:
            bookInfoBlock = fileBytes[index+4:index+4+length3-4-6]
            if debuggingThisModule:
                print( "cHeader2 for {}: {}={} {}={}".format( self.abbreviation, bookInfoBlock[0], hexlify(bookInfoBlock[0:1]), bookInfoBlock[1], hexlify(bookInfoBlock[1:2]) ) )
            assert bookInfoBlock[0]==0x78 and bookInfoBlock[1]==0xda # Zlib compression header (for compression levels 7-9)
            byteResult = zlib.decompress( bookInfoBlock )
            #rewriteResult1 = zlib.compress( byteResult, 9 )
            #byteResult1 = zlib.decompress( rewriteResult1 )
            #compressor = zlib.compressobj(level=9, method=zlib.DEFLATED, wbits=15, memLevel=8, strategy=zlib.Z_DEFAULT_STRATEGY )
            #rewriteResult2 = compressor.compress( byteResult )
            #rewriteResult2 += compressor.flush()
            #byteResult2 = zlib.decompress( rewriteResult2 )
            #print( "rewrite1 {} {} {}\n         {} {} {}\n         {} {} {}\n      to {} {}\n      to {} {}\n      to {} {}" \
                        #.format( len(bookInfoBlock), hexlify(bookInfoBlock), bookInfoBlock,
                                 #len(rewriteResult1), hexlify(rewriteResult1), rewriteResult1,
                                 #len(rewriteResult2), hexlify(rewriteResult2), rewriteResult2,
                                 #len(byteResult), byteResult,
                                 #len(byteResult1), byteResult1,
                                 #len(byteResult2), byteResult2 ) )
            textResult = byteResult.decode( 'utf8' )
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print( "Block4: Got {} chars {!r} from {} bytes".format( len(textResult), textResult, length3 ) )
            assert textResult.startswith('ezFree') or textResult.startswith('ezPaid')
            keep['workName'] = (index+4,textResult)
            if BibleOrgSysGlobals.verbosityLevel > 1:
                print( "EasyWorshipBible.load: " + _("Setting module work name to {!r}").format( textResult ) )
            if self.name: self.workName = textResult
            else: # Should rarely happen
                self.name = self.workName = textResult
            workNameAppendage = fileBytes[index+4+length3-6-4:index+4+length3-4]
            #print( "workNameAppendage", len(workNameAppendage), hexlify(workNameAppendage), workNameAppendage )
            keep['workNameAppendage'] = (index+4+length3-6-4,workNameAppendage)
            assert workNameAppendage[:4] == b'QK\x03\x04'
            uncompressedNameLength, = struct.unpack( "<B", workNameAppendage[4:5] )
            assert workNameAppendage[5:] == b'\x00'
            assert len(textResult) == uncompressedNameLength
        keep['length3'] = (index,length3)
        index += length3
        #print( self.abbreviation, len(textResult), repr(textResult), 'length3', length3, len(textResult)+18 )
        assert length3 == len(textResult) + 18

        bookDataStartIndex = rawBooks[0][3]
        #print( "bookDataStartIndex", bookDataStartIndex )

        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            #print( 'After known contents @ {:,}'.format( index ), hexlify( fileBytes[index:index+60] ), fileBytes[index:index+60] )

        block0080 = fileBytes[index:bookDataStartIndex]
        #print( "block0080", index, len(block0080), hexlify(block0080), block0080 )
        keep['block0080'] = (index,block0080)
        assert block0080 == b'\x00\x00\x08\x00' # b'00000800'
        index += len( block0080 )
        keep['bookDataStartIndex'] = (index,bookDataStartIndex)
        assert index == bookDataStartIndex # Should now be at the start of the first book (already fetched above)

        # Look at extra stuff right at the end of the file
        assert len(rawBooks) == 66
        index = bookStart + bookLength # of the last book
        endBytes = fileBytes[index:]
        #if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            #print( 'endBytes', len(endBytes), hexlify(endBytes), endBytes )
        assert len(endBytes) == 16
        keep['endBytes'] = (index,endBytes)
        assert endBytes == b'\x18:\x00\x00\x00\x00\x00\x00ezwBible' # b'183a000000000000657a774269626c65'
        del fileBytes # Not needed any more

        # Now we have to decode the book text (compressed about 4x with zlib)
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "EWB loading books for {}…".format( self.abbreviation ) )
        for j, BBB in enumerate( BOS.getBookList() ):
            bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[j]
            if bookLength == 0:
                assert not bookBytes
                logging.critical( "   Skipped empty {}".format( BBB ) )
                continue
            if BibleOrgSysGlobals.verbosityLevel > 2: print( '  Decoding {}…'.format( BBB ) )
            bookBytes, bookExtra = bookBytes[:-10], bookBytes[-10:]
            assert len(bookExtra) == 10
            keep['bookExtra-{}'.format(j+1)] = (-10,bookExtra)
            assert bookExtra[:4] == b'QK\x03\x04'
            uncompressedBookLength, = struct.unpack( "<I", bookExtra[4:8] )
            assert bookExtra[8:] == b'\x08\x00'
            byteResult = zlib.decompress( bookBytes )
            assert len(byteResult) == uncompressedBookLength
            try: textResult = byteResult.decode( 'utf8' )
            except UnicodeDecodeError:
                logging.critical( "Unable to decode {} {} bookText -- maybe it's not utf-8???".format( self.abbreviation, BBB ) )
                continue
            if debuggingThisModule:
                rewriteResult1 = zlib.compress( byteResult, 9 )
                byteResult1 = zlib.decompress( rewriteResult1 )
                if rewriteResult1 != bookBytes:
                    print( "\nbookBytes", len(bookBytes), hexlify(bookBytes) )
                    print( "\nrewriteResult1", len(rewriteResult1), hexlify(rewriteResult1) )
                    halt
                if byteResult1 != byteResult:
                    print( len(byteResult), hexlify(byteResult) )
                    print( len(byteResult1), hexlify(byteResult1) )
                    halt
            if '\t' in textResult:
                logging.warning( "Replacing tab characters in {} = {}".format( BBB, bookAbbrev ) )
                textResult = textResult.replace( '\t', ' ' )
            #print( textResult )
            if BibleOrgSysGlobals.strictCheckingFlag: assert '  ' not in textResult

            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'EasyWorship Bible Book object'
            thisBook.objectTypeString = 'EasyWorship Bible'
            if bookAbbrev: thisBook.addLine( 'toc3', bookAbbrev )

            C, V = '-1', '-1' # So first/id line starts at -1:0
            for line in textResult.split( '\r\n' ):
                if not line: continue # skip blank lines
                #if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    #print( 'Processing {} {} line: {!r}'.format( self.abbreviation, BBB, line ) )
                assert line[0].isdigit()
                assert ':' in line[:4]
                CV,verseText = line.split( ' ', 1 )
                newC,newV = CV.split( ':' )
                #print( newC, V, repr(verseText) )
                if newC != C:
                    if self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bad bug -- chapter 24 has verses out of order
                        logging.critical( "Skipping error for out-of-order chapters in {}!".format( BBB ) )
                    else: assert int(newC) > int(C)
                    C, V = newC, '0'
                    thisBook.addLine( 'c', C )
                if self.abbreviation=='TB' and BBB=='JOL': # Handle a bug -- chapter 3 repeats
                    if int(newV) < int(V): break
                elif self.abbreviation=='drv' and BBB in ('GEN','EXO','NUM',): # Handle a bug -- Gen 18:1&12, Exo 28:42&43 out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='rsv' and BBB in ('EXO','HAG',): # Handle a bug -- chapter 22 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='gnt' and BBB in ('ISA','ZEC','MRK',): # Handle a bug -- chapter 38 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='hcsb' and BBB in ('SA2',): # Handle a bug -- chapter 24 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                elif self.abbreviation=='msg' and BBB in ('NUM','JDG','SA2','CH2','EZE','ACT',): # Handle a bug -- chapter 24 has verses out of order
                    logging.critical( "Skipping error for out-of-order verses in {} {}".format( self.abbreviation, BBB ) )
                else:
                    try: assert int(newV) > int(V)
                    except ValueError:
                        logging.critical( "Something's not an integer around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) )
                    except AssertionError:
                        logging.critical( "Something's out of order around {} {} {}:{} {}".format( self.abbreviation, BBB, C, V, verseText ) )
                V = newV
                thisBook.addLine( 'v', V + ' ' + verseText )

            if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB )
            self.stashBook( thisBook )

        self.doPostLoadProcessing()
        return keep
Esempio n. 24
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        loadErrors = []

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a e-Sword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a e-Sword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        # Just get some information from the file
        cursor.execute( 'select * from Bible' )
        rows = cursor.fetchall()
        numRows = len(rows)
        if Globals.debugFlag or Globals.verbosityLevel>2: print( '{} rows found'.format( numRows ) )
        BBBn1 = rows[0][0]
        if Globals.debugFlag or Globals.verbosityLevel>2: print( 'First book number is {}'.format( BBBn1 ) )
        del rows
        BBB1 = None
        if BBBn1 <= 66: BBB1 = Globals.BibleBooksCodes.getBBBFromReferenceNumber( BBBn1 )


        testament = BBB = None
        booksExpected = textLineCountExpected = 0
        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957
        elif self.settingsDict['Abbreviation'] == 'VIN2011': # Handle encoding error
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['Apocrypha']: # incomplete
            testament, BBB = 'AP', 'XXX'
            booksExpected, textLineCountExpected = 99, 999999
            halt
        if not BBB:
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            if 0:
                cursor.execute( 'select * from Bible' )
                rows = cursor.fetchall()
                print( "rows", len(rows) )
                for row in rows:
                    assert( len(row) == 4 )
                    BBBn, C, V, text = row # First three are integers, the last is a string
                    print( BBBn, C, V, repr(text) )
                    if C==2: break
                del rows # Takes a lot of memory
        if Globals.debugFlag or Globals.verbosityLevel>2:
            print( "Testament={} BBB={} BBB1={}, bE={}, tLCE={} nR={}".format( testament, BBB, BBB1, booksExpected, textLineCountExpected, numRows ) )
        if BBB1 != BBB:
            logging.critical( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            loadErrors.append( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            if not BBB: BBB = BBB1
        if numRows != textLineCountExpected:
            logging.critical( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
            loadErrors.append( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
        #halt

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self.name, BBB )
        thisBook.objectNameString = "e-Sword Bible Book object"
        thisBook.objectTypeString = "e-Sword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except: # This reference is missing
                #print( "something wrong at", BBB, C, V )
                #if Globals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'e-Sw file line is "' + line + '"' )
            if line is None: logging.warning( "ESwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "ESwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "ESwordBible.load: Probably encrypted module: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                        break
                elif not line: logging.warning( "ESwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    if '\r' in line or '\n' in line:
                        if Globals.debugFlag:
                            logging.warning( "ESwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                        #print( repr(line) )
                    while line and line[-1] in '\r\n': line = line[:-1] # Remove CR/LFs from the end
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) # Replace CR/LFs in the middle

            #print( "e-Sword.load", BBB, C, V, repr(line) )
            self.handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self.name, BBB )
                    thisBook.objectNameString = "e-Sword Bible Book object"
                    thisBook.objectTypeString = "e-Sword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.appendLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.appendLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.appendLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        if Globals.strictCheckingFlag or Globals.debugFlag: self.checkForExtraMaterial( cursor, BOS )
        cursor.close()
        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        self.doPostLoadProcessing()
Esempio n. 25
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        status = 0  # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1:
                    if line[0] == chr(65279):  #U+FEFF
                        logging.info(
                            "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(self.sourceFilepath))
                        line = line[
                            1:]  # Remove the UTF-16 Unicode Byte Order Marker (BOM)
                    elif line[:3] == '':  # 0xEF,0xBB,0xBF
                        logging.info(
                            "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(self.sourceFilepath))
                        line = line[
                            3:]  # Remove the UTF-8 Unicode Byte Order Marker (BOM)
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines

                #print ( 'DB file line is "' + line + '"' )
                if line[0] == '#': continue  # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning(
                            "Unknown DrupalBible first line: {}".format(
                                repr(line)))

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else:  # Get the version name details
                        bits = line.split('|')
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else:  # Get the book name details
                        bits = line.split('|')
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert bookShortName == bookCode
                        BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode(
                            bookCode)
                        BBB = BBBresult if isinstance(
                            BBBresult, str
                        ) else BBBresult[
                            0]  # Result can be string or list of strings (best guess first)
                        bookDetails[
                            BBB] = bookFullName, bookShortName, numChapters

                elif status == 2:  # Get the verse text
                    bits = line.split('|')
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark:
                        print(repr(lineMark))
                        halt
                    BBBresult = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromDrupalBibleCode(
                        bookCode)
                    BBB = BBBresult if isinstance(
                        BBBresult, str
                    ) else BBBresult[
                        0]  # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.stashBook(thisBook)
                        thisBook = BibleBook(self, BBB)
                        thisBook.objectNameString = 'DrupalBible Bible Book object'
                        thisBook.objectTypeString = 'DrupalBible'
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.addLine('c', chapterNumberString)
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace('<', '\\it ').replace(
                        '>', '\\it*')
                    thisBook.addLine('v', verseNumberString + ' ' + verseText)

                else:
                    halt

        # Save the final book
        self.stashBook(thisBook)
        self.doPostLoadProcessing()
Esempio n. 26
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      CSVBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if line==' ': continue # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0]=='#': continue # Just discard comment lines
                if lineCount==1:
                    if line.startswith( '"Book",' ):
                        quoted = True
                        continue # Just discard header line
                    elif line.startswith( 'Book,' ):
                        quoted = False
                        continue # Just discard header line

                bits = line.split( ',', 3 )
                #print( lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else: print( "Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits )

                # Remove quote marks from these strings
                if quoted:
                    if len(bString)>=2 and bString[0]==bString[-1] and bString[0] in '"\'': bString = bString[1:-1]
                    if len(chapterNumberString)>=2 and chapterNumberString[0]==chapterNumberString[-1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString)>=2 and verseNumberString[0]==verseNumberString[-1] and verseNumberString[0] in '"\'': verseNumberString = verseNumberString[1:-1]
                    if len(vText)>=2 and vText[0]==vText[-1] and vText[0] in '"\'': vText = vText[1:-1]
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                    #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if Globals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                #if Globals.debugFlag: assert( chapterNumberString.isdigit() )
                #if Globals.debugFlag: assert( verseNumberString.isdigit() )
                bookNumber = int( bString )
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookNumber != lastBookNumber: # We've started a new book
                    if lastBookNumber != -1: # Better save the last book
                        self.saveBook( thisBook )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )  # Try to guess
                    assert( BBB )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "CSV Bible Book object"
                    thisBook.objectTypeString = "CSV"
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace( '\\ldblquote', '“' ).replace( '\\rdblquote', '”' ).replace( '\\lquote', '‘' ).replace( '\\rquote', '’' )
                vText = vText.replace( '\\emdash', '—' ).replace( '\\endash', '–' )
                # Now do Unicode characters
                while True: # Find patterns like \\'d3
                    match = re.search( r"\\'[0-9a-f][0-9a-f]", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()], 16 ) # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                while True: # Find patterns like \\u253?
                    match = re.search( r"\\u[1-2][0-9][0-9]\?", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()-1] ) # Convert three digits to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                    #assert( BBB=='PSA' and verseNumberString=='1' )
                    #vBits = vText[1:].split( '»' )
                    ##print( "vBits", vBits )
                    #thisBook.appendLine( 'd', vBits[0] ) # Psalm title
                    #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
Esempio n. 27
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      CSVBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if line == ' ':
                    continue  # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0] == '#': continue  # Just discard comment lines
                if lineCount == 1:
                    if line.startswith('"Book",'):
                        quoted = True
                        continue  # Just discard header line
                    elif line.startswith('Book,'):
                        quoted = False
                        continue  # Just discard header line

                bits = line.split(',', 3)
                #print( lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bString, chapterNumberString, verseNumberString,
                          vText, len(bits), bits)

                # Remove quote marks from these strings
                if quoted:
                    if len(bString) >= 2 and bString[0] == bString[
                            -1] and bString[0] in '"\'':
                        bString = bString[1:-1]
                    if len(chapterNumberString) >= 2 and chapterNumberString[
                            0] == chapterNumberString[
                                -1] and chapterNumberString[0] in '"\'':
                        chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString) >= 2 and verseNumberString[
                            0] == verseNumberString[-1] and verseNumberString[
                                0] in '"\'':
                        verseNumberString = verseNumberString[1:-1]
                    if len(vText) >= 2 and vText[0] == vText[-1] and vText[
                            0] in '"\'':
                        vText = vText[1:-1]
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #continue
                #if BibleOrgSysGlobals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                #if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() )
                #if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() )
                bookNumber = int(bString)
                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)

                if bookNumber != lastBookNumber:  # We've started a new book
                    if lastBookNumber != -1:  # Better save the last book
                        self.saveBook(thisBook)
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumber)  # Try to guess
                    assert (BBB)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "CSV Bible Book object"
                    thisBook.objectTypeString = "CSV"
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert (chapterNumber > lastChapterNumber
                                or BBB == 'ESG'
                                )  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookNumber,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace('\\ldblquote', '“').replace(
                    '\\rdblquote', '”').replace('\\lquote',
                                                '‘').replace('\\rquote', '’')
                vText = vText.replace('\\emdash', '—').replace('\\endash', '–')
                # Now do Unicode characters
                while True:  # Find patterns like \\'d3
                    match = re.search(r"\\'[0-9a-f][0-9a-f]", vText)
                    if not match: break
                    i = int(vText[match.start() + 2:match.end()],
                            16)  # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr(
                        i) + vText[match.end():]
                while True:  # Find patterns like \\u253?
                    match = re.search(r"\\u[1-2][0-9][0-9]\?", vText)
                    if not match: break
                    i = int(vText[match.start() + 2:match.end() -
                                  1])  # Convert three digits to decimal
                    vText = vText[:match.start()] + chr(
                        i) + vText[match.end():]
                #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                #assert( BBB=='PSA' and verseNumberString=='1' )
                #vBits = vText[1:].split( '»' )
                ##print( "vBits", vBits )
                #thisBook.addLine( 'd', vBits[0] ) # Psalm title
                #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()
Esempio n. 28
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      UnboundBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'UB file line is "' + line + '"' )
                if line[0]=='#':
                    hashBits = line[1:].split( '\t' )
                    if len(hashBits)==2 and hashBits[1]: # We have some valid meta-data
                        if hashBits[0] == 'name': self.name = hashBits[1]
                        elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        elif hashBits[0] == 'language': self.language = hashBits[1]
                        elif hashBits[0] == 'note': self.note = hashBits[1]
                        elif hashBits[0] == 'columns': self.columns = hashBits[1]
# Should some of these be placed into self.settingsDict???
                        logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue # Just discard comment lines

                bits = line.split( '\t' )
                #print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_' ):
                    logging.warning( _("Skipping bad {!r} line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt

                if NRSVA_bookCode: assert( len(NRSVA_bookCode) == 3 )
                if NRSVA_chapterNumberString: assert( NRSVA_chapterNumberString.isdigit() )
                if NRSVA_verseNumberString: assert( NRSVA_verseNumberString.isdigit() )

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BibleOrgSysGlobals.debugFlag: assert( len(bookCode) == 3 )
                if BibleOrgSysGlobals.debugFlag: assert( chapterNumberString.isdigit() )
                if BibleOrgSysGlobals.debugFlag: assert( verseNumberString.isdigit() )

                if subverseNumberString:
                    logging.warning( _("subverseNumberString {!r} in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) )

                vText = vText.strip() # Remove leading and trailing spaces
                if not vText: continue # Just ignore blank verses I think
                if vText == '+': continue # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag: assert( sequenceNumberString.isdigit() )
                    sequenceNumber = int( sequenceNumberString )
                    if BibleOrgSysGlobals.debugFlag: assert( sequenceNumber > lastSequence or \
                        self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) ) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode: # We've started a new book
                    if lastBookCode != -1: # Better save the last book
                        self.saveBook( thisBook )
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = 'Unbound Bible Book object'
                    thisBook.objectTypeString = 'Unbound'
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    thisBook.addLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search('^; TITLE:\\s', line)
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print("First line got type {!r} match from {!r}".
                                  format(match.group(0), line))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}"
                                .format(firstLine, thisFilename))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue

                #print ( 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith('; TITLE:'):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith('; ABBREVIATION:'):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith('; HAS ITALICS'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES:'):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS REDLETTER'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0] == ';':
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}"
                        .format(line))
                    continue  # Just discard comment lines

                # Process the main segment
                if line.startswith('$$ '):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #print( "pointer", repr(pointer) )
                    if pointer and pointer[0] == '{' and pointer[-1] == '}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #print( "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else:  # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split(' ', 1)
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split(
                                ':')
                            chapterNumber = int(chapterNumberString)
                            verseNumber = int(verseNumberString)
                            if bookCode != lastBookCode:  # We've started a new book
                                if bookCode in ('Ge', ): BBB = 'GEN'
                                elif bookCode in ('Le', ): BBB = 'LEV'
                                elif bookCode in ('La', ):
                                    BBB = 'LAM'
                                    ##elif bookCode in ('Es',): BBB = 'EST'
                                    ##elif bookCode in ('Pr',): BBB = 'PRO'
                                    #elif bookCode in ('So',): BBB = 'SNG'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #print( "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText(
                                        bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOS81.getBBBFromText(
                                            bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOSx.getBBBFromText(
                                            bookCode)  # Try to guess
                                    #print( "4BBB =", repr(BBB) )
                        else:
                            print("Unexpected number of bits", self.givenName,
                                  BBB, bookCode, chapterNumberString,
                                  verseNumberString, len(bits), bits)
                    continue  # Just save the pointer information which refers to the text on the next line
                else:  # it's not a $$ line
                    text = line
                    #print( "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else
                                             '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace('(<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>)', '\\x*')
                        vText = vText.replace('<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>', '\\x*')
                        #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) )
                        #print( BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search('\\{(.+?)\\}', vText)
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format(
                                chapterNumber, verseNumber, match.group(1))
                            vText = vText[:match.start(
                            )] + footnoteText + vText[
                                match.end():]  # Replace this footnote
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\{(.+?)\\}', vText)
                        # Convert [stuff] to added fields
                        match = re.search('\\[(.+?)\\]', vText)
                        while match:
                            addText = '\\add {}\\add*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\[(.+?)\\]', vText)
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search('\\+r/(.+?)-r/', vText)
                        while match:
                            addText = '\\wj {}\\wj*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\+r/(.+?)-r/', vText)
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning(
                                    "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}"
                                    .format(BBB, chapterNumberString,
                                            verseNumberString, vText))
                                break

                if bookCode:
                    if bookCode != lastBookCode:  # We've started a new book
                        if lastBookCode != -1:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "ForgeForSwordSearcherBible could not figure out {!r} book code"
                                .format(bookCode))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCode,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCode yet
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}"
                        .format(line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #print( "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata(
                'Forge4SS')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Esempio n. 30
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['Unbound'] = {}

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" )
                #line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line and line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'UB file line is "' + line + '"' )
                if line[0] == '#':
                    hashBits = line[1:].split('\t')
                    if len(hashBits) == 2 and hashBits[
                            1]:  # We have some valid meta-data
                        self.suppliedMetadata['Unbound'][
                            hashBits[0]] = hashBits[1]
                        #if hashBits[0] == 'name': self.name = hashBits[1]
                        #elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        #elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        #elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        #elif hashBits[0] == 'language': self.language = hashBits[1]
                        #elif hashBits[0] == 'note': self.note = hashBits[1]
                        #elif hashBits[0] == 'columns': self.columns = hashBits[1]
                        #logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue  # Just discard comment lines

                bits = line.split('\t')
                #print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith(
                        'lxx_a_parsing_'):
                    logging.warning(
                        _("Skipping bad {!r} line in {} {} {} {}:{}").format(
                            line, self.givenName, BBB, bookCode,
                            chapterNumberString, verseNumberString))
                    continue
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)
                    halt

                if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3
                if NRSVA_chapterNumberString:
                    assert NRSVA_chapterNumberString.isdigit()
                if NRSVA_verseNumberString:
                    assert NRSVA_verseNumberString.isdigit()

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print("Skipping empty line in {} {} {} {}:{}".format(
                        self.givenName, BBB, bookCode, chapterNumberString,
                        verseNumberString))
                    continue
                if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3
                if BibleOrgSysGlobals.debugFlag:
                    assert chapterNumberString.isdigit()
                if BibleOrgSysGlobals.debugFlag:
                    assert verseNumberString.isdigit()

                if subverseNumberString:
                    logging.warning(
                        _("subverseNumberString {!r} in {} {} {}:{}").format(
                            subverseNumberString, BBB, bookCode,
                            chapterNumberString, verseNumberString))

                vText = vText.strip()  # Remove leading and trailing spaces
                if not vText: continue  # Just ignore blank verses I think
                if vText == '+':
                    continue  # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumberString.isdigit()
                    sequenceNumber = int(sequenceNumberString)
                    if BibleOrgSysGlobals.debugFlag:                        assert sequenceNumber > lastSequence or \
self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.stashBook(thisBook)
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode(
                        bookCode)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'Unbound Bible Book object'
                    thisBook.objectTypeString = 'Unbound'
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.stashBook(thisBook)
        self.applySuppliedMetadata('Unbound')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()
Esempio n. 31
0
    def load(self):
        """
        Load the compressed data file and import book elements.
        """
        import zlib
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print(_("\nLoading {}…").format(self.sourceFilepath))
        with open(self.sourceFilepath,
                  'rb') as myFile:  # Automatically closes the file when done
            fileBytes = myFile.read()
        if BibleOrgSysGlobals.debugFlag:
            print("  {:,} bytes read".format(len(fileBytes)))

        keep = {}
        index = 0
        #print( 'block1', hexlify( fileBytes[index:index+32] ), fileBytes[index:index+32] )
        keep['block1'] = fileBytes[index:index + 32]
        hString = ''
        for j in range(0, 32):
            char8 = fileBytes[index + j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            hString += chr(char8)
        if BibleOrgSysGlobals.debugFlag:
            print('block1b', hexlify(fileBytes[index + j:index + 32]))
        # Skipped some (important?) binary here
        index += 32
        if BibleOrgSysGlobals.debugFlag: print('hString', repr(hString), index)
        assert hString == 'EasyWorship Bible Text'

        #print( 'block2', hexlify( fileBytes[index:index+56] ), fileBytes[index:index+56] )
        keep['block2'] = fileBytes[index:index + 56]
        nString = ''
        for j in range(0, 32):
            char8 = fileBytes[index + j]
            #print( char8, repr(char8) )
            if char8 < 0x20: break
            nString += chr(char8)
        # Skipped some zeroes here
        index += 56
        if BibleOrgSysGlobals.debugFlag: print('nString', repr(nString), index)
        self.name = nString

        rawBooks = []
        for b in range(1, 66 + 1):
            bookAbbrev = ''
            for j in range(0, 32):
                char8 = fileBytes[index + j]
                #print( char8, repr(char8) )
                if char8 < 0x20: break
                bookAbbrev += chr(char8)
            # Skipped some zeroes here
            index += 51
            if bookAbbrev and bookAbbrev[-1] == '.':
                bookAbbrev = bookAbbrev[:-1]  # Remove final period
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print('bookAbbrev', repr(bookAbbrev))
            numChapters = fileBytes[index]
            numVerses = []
            for j in range(0, numChapters):
                numVerses.append(fileBytes[index + j + 1])
            # Skipped some zeroes here
            index += 157
            if BibleOrgSysGlobals.debugFlag:
                print(' ', numChapters, numVerses)
            bookStart, = struct.unpack("<I", fileBytes[index:index + 4])
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print('  bookStart', bookStart)
            bookLength, = struct.unpack("<I", fileBytes[index:index + 4])
            # Skipped some zeroes here
            index += 8
            if BibleOrgSysGlobals.debugFlag:
                print('  bookLength', bookLength, bookStart + bookLength)
            bookBytes = fileBytes[bookStart:bookStart + bookLength]
            assert bookBytes[
                0] == 0x78 and bookBytes[1] == 0xda  # Zlib compression header
            rawBooks.append((bookAbbrev, numChapters, numVerses, bookStart,
                             bookLength, bookBytes))

        if BibleOrgSysGlobals.debugFlag:
            print('unknown block3', index,
                  hexlify(fileBytes[index:index + 30]))
        keep['block3'] = fileBytes[index:index + 30]
        length3, = struct.unpack("<I", fileBytes[index:index + 4])
        if length3:
            block3 = fileBytes[index + 4:index + 4 + length3 - 4]
            byteResult = zlib.decompress(block3)
            textResult = byteResult.decode('utf8')
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print("Got", len(textResult), textResult, 'from', length3)
            keep['block3n'] = textResult
            if self.name:
                print('Overwriting module name {!r} with {!r}'.format(
                    self.name, textResult))
            self.name = textResult
        index += length3
        if BibleOrgSysGlobals.debugFlag:
            print('end of contents', index,
                  hexlify(fileBytes[index:index + 60]))
        keep['block4'] = rawBooks[0][3]

        block5 = fileBytes[index:rawBooks[0][3]]
        keep['block5'] = block5
        index += len(block5)
        #if self.abbreviation in ( 'TB', ): # Why don't the others work
        assert index == rawBooks[0][
            3]  # Should now be at the start of the first book (already fetched above)

        assert len(rawBooks) == 66
        # Look at extra stuff at end
        endBytes = fileBytes[bookStart + bookLength:]
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print('endBytes', len(endBytes), hexlify(endBytes), endBytes)
        assert len(endBytes) == 16
        keep['block9'] = endBytes
        # Skipped some binary and some text here
        del fileBytes

        # Now we have to decode the book text (compressed about 4x with zlib)
        for j, BBB in enumerate(BOS.getBookList()):
            if BibleOrgSysGlobals.verbosityLevel > 2:
                print('  Decoding {}…'.format(BBB))
            bookAbbrev, numChapters, numVerses, bookStart, bookLength, bookBytes = rawBooks[
                j]
            byteResult = zlib.decompress(bookBytes)
            textResult = byteResult.decode('utf8')
            if '\t' in textResult:
                logging.warning("Replacing tab characters in {} = {}".format(
                    BBB, bookAbbrev))
                textResult = textResult.replace('\t', ' ')
            #print( textResult )
            if BibleOrgSysGlobals.strictCheckingFlag:
                assert '  ' not in textResult

            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'EasyWorship Bible Book object'
            thisBook.objectTypeString = 'EasyWorship Bible'
            if bookAbbrev: thisBook.addLine('toc3', bookAbbrev)

            C, V = '0', '-1'  # So id line starts at 0:0
            for line in textResult.split('\r\n'):
                if not line: continue  # skip blank lines
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print('Processing {} {} line: {!r}'.format(
                        self.abbreviation, BBB, line))
                assert line[0].isdigit()
                assert ':' in line[:4]
                CV, verseText = line.split(' ', 1)
                newC, newV = CV.split(':')
                #print( newC, V, repr(verseText) )
                if newC != C:
                    if self.abbreviation == 'hcsb' and BBB in (
                            'SA2',
                    ):  # Handle a bad bug -- chapter 24 has verses out of order
                        print(
                            "Skipping error for out-of-order chapters in {}!".
                            format(BBB))
                    else:
                        assert int(newC) > int(C)
                    C, V = newC, '0'
                    thisBook.addLine('c', C)
                if self.abbreviation == 'TB' and BBB == 'JOL':  # Handle a bug -- chapter 3 repeats
                    if int(newV) < int(V): break
                elif self.abbreviation == 'rsv' and BBB in (
                        'EXO',
                        'HAG',
                ):  # Handle a bug -- chapter 22 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                elif self.abbreviation == 'gnt' and BBB in (
                        'ISA',
                        'ZEC',
                        'MRK',
                ):  # Handle a bug -- chapter 38 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                elif self.abbreviation == 'hcsb' and BBB in (
                        'SA2',
                ):  # Handle a bug -- chapter 24 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                elif self.abbreviation == 'msg' and BBB in (
                        'NUM',
                        'JDG',
                        'SA2',
                        'CH2',
                        'EZE',
                        'ACT',
                ):  # Handle a bug -- chapter 24 has verses out of order
                    print("Skipping error for out-of-order verses in {} {}".
                          format(self.abbreviation, BBB))
                else:
                    try:
                        assert int(newV) > int(V)
                    except ValueError:
                        if BibleOrgSysGlobals.debugFlag:
                            print(
                                "Something's not an integer around {} {}:{} {}"
                                .format(BBB, C, V, verseText))
                V = newV
                thisBook.addLine('v', V + ' ' + verseText)

            if BibleOrgSysGlobals.verbosityLevel > 3: print("Saving", BBB)
            self.stashBook(thisBook)

        self.doPostLoadProcessing()
        return keep
Esempio n. 32
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        def decodeVerse(encodedVerseString):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace('@^', '\\\\p ')
            verseString = verseString.replace('@0', '\\\\m ')
            verseString = verseString.replace('@1', '\\\\q1 ').replace(
                '@2', '\\\\q2 ').replace('@3',
                                         '\\\\q3 ').replace('@4', '\\q4 ')
            verseString = verseString.replace('@8', '\\\\m ')
            # Character markers (marked now with single backslash)
            verseString = verseString.replace('@6',
                                              '\\wj ').replace('@5', '\\wj*')
            verseString = verseString.replace('@9', '\\add ').replace(
                '@7', '\\add*')  # or \\i ???
            verseString = re.sub(r'@<f([0-9])@>@/', r'\\ff\1', verseString)
            verseString = re.sub(r'@<x([0-9])@>@/', r'\\xx\1', verseString)
            #print( repr( verseString ) )
            assert ('@' not in verseString)
            return verseString

        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(
        ), OrderedDict(), {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      YETBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'YETBible file line is "' + line + '"' )

                bits = line.split('\t')
                #print( self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert (len(bits) == 3)
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert (2 <= len(locale) <= 3)
                        if locale == 'in':
                            locale = 'id'  # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert (3 <= len(bits) <= 4)
                    thisBBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bits[1])
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert (len(bits) == 5)
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB:  # We have a new book
                        if lastBBB is not None:  # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert (BBB in bookNameDict)
                        bookLines = OrderedDict()  # Keys are (C,V) strings
                    verseString = decodeVerse(encodedVerseString)
                    bookLines[(chapterNumberString, verseNumberString
                               )] = verseString  # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert (len(bits) == 5)
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    headingString = encodedHeadingString.replace(
                        '@9', '\\it ').replace('@7', '\\it*')
                    #print( repr(encodedHeadingString), repr(headingString) )
                    assert ('@' not in headingString)
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = headingString, [
                                 ]  # Blank refList
                    continue
                elif bits[
                        0] == 'parallel':  # These lines optionally follow pericope lines
                    assert (len(bits) == 2)
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    refList.append(bits[1])
                    #print( "parallel2", repr(heading), refList )
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert (len(bits) == 6)
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                        assert (indexNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    noteString = re.sub(
                        r'@<ta(.+?)@>', r'', noteString
                    )  # Get rid of these encoded BCV references for now
                    noteString = re.sub(
                        r'@<to(.+?)@>', r'', noteString
                    )  # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace('@/', '')
                    #print( repr(encodedNoteString), repr(noteString) )
                    assert ('@' not in noteString)
                    xrefDict[(BBB, chapterNumberString, verseNumberString,
                              indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert (len(bits) == 6)
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                        assert (indexNumberString.isdigit())
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    assert ('@' not in noteString)
                    footnoteDict[(BBB, chapterNumberString, verseNumberString,
                                  indexNumberString)] = noteString
                    continue
                else:
                    print("YETBible: Unknown line type", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)
                    halt
            bookDict[lastBBB] = bookLines  # Save the last book

            #if bookCode != lastBookCode: # We've started a new book
            #if lastBookCode != -1: # Better save the last book
            #self.saveBook( thisBook )
            #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode )
            #thisBook = BibleBook( self, BBB )
            #thisBook.objectNameString = "YET Bible Book object"
            #thisBook.objectTypeString = "YET"
            #lastBookCode = bookCode
            #lastChapterNumber = lastVerseNumber = -1

            #if chapterNumber != lastChapterNumber: # We've started a new chapter
            #if BibleOrgSysGlobals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
            #if chapterNumber == 0:
            #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #thisBook.addLine( 'c', chapterNumberString )
            #lastChapterNumber = chapterNumber
            #lastVerseNumber = -1

            ## Handle the verse info
            #if verseNumber==lastVerseNumber and vText==lastVText:
            #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #continue
            #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
            ## Move Psalm titles to verse zero
            #verseNumber = 0
            #if verseNumber < lastVerseNumber:
            #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #elif verseNumber == lastVerseNumber:
            #if vText == lastVText:
            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #else:
            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #thisBook.addLine( 'v', verseNumberString + ' ' + vText )
            #lastVText = vText
            #lastVerseNumber = verseNumber

        # Now process the books
        for BBB, bkData in bookDict.items():
            #print( "Processing", BBB )
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = "YET Bible Book object"
            thisBook.objectTypeString = "YET"
            lastChapterNumberString = None
            for (chapterNumberString,
                 verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB, chapterNumberString,
                        verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.addLine('s', heading)
                    if refList:
                        refString = ""
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.addLine('r', '(' + refString + ')')
                # Insert footnotes and cross-references
                while ('\\ff' in verseString):
                    #print( "footnote", repr(verseString) )
                    fIx = verseString.index('\\ff')
                    caller = verseString[fIx + 3]
                    #print( "fcaller", repr(caller) )
                    assert (caller.isdigit())
                    note = footnoteDict[(BBB, chapterNumberString,
                                         verseNumberString, caller)]
                    #print( "fnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[
                                                  fIx + 4:]
                    #print( "fvS", repr(verseString) )
                while ('\\xx' in verseString):
                    #print( "xref", repr(verseString) )
                    fIx = verseString.index('\\xx')
                    caller = verseString[fIx + 3]
                    #print( "xcaller", repr(caller) )
                    assert (caller.isdigit())
                    note = xrefDict[(BBB, chapterNumberString,
                                     verseNumberString, caller)]
                    #print( "xnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[
                                                  fIx + 4:]
                    #print( "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumberString = chapterNumberString
                #print( BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith(
                        '\\\\'):  # It's an initial paragraph marker
                    if verseString[3] == ' ':
                        marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4] == ' ':
                        marker, verseString = verseString[2:4], verseString[5:]
                    else:
                        halt
                    #print( '', '\\'+marker )
                    thisBook.addLine(marker, '')
                assert (not verseString.startswith('\\\\'))
                bits = verseString.split(
                    '\\\\'
                )  # Split on paragraph markers (but not character markers)
                for j, bit in enumerate(bits):
                    #print( "loop", j, repr(bit), repr(verseString) )
                    if j == 0:
                        thisBook.addLine(
                            'v',
                            verseNumberString + ' ' + verseString.rstrip())
                    else:
                        if bit[1] == ' ': marker, bit = bit[0], bit[2:]
                        elif bit[2] == ' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #print( "mV", marker, repr(bit), repr(verseString) )
                        thisBook.addLine(marker, bit.rstrip())
            self.saveBook(thisBook)
        self.doPostLoadProcessing()
Esempio n. 33
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None
        lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search(
                        '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line)
                    if match: vplType = 1
                    else:
                        match = re.search('^(\\d{8})\\s', line)
                        if match: vplType = 2
                        else:
                            match = re.search('^# language_name:\\s', line)
                            if match: vplType = 3
                            #else:
                            #match = re.search( '^; TITLE:\\s', line )
                            #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print(
                                "First line got type #{} {!r} match from {!r}".
                                format(vplType, match.group(0), line))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "VPLBible.load: (unexpected) first line was {!r} in {}"
                                .format(line, self.sourceFilepath))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue
                    #print( 'vplType', vplType )

                #print ( 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if line.startswith('# language_name:'):
                        string = line[16:].strip()
                        if string and string != 'Not available':
                            settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith('# closest ISO 639-3:'):
                        string = line[20:].strip()
                        if string and string != 'Not available':
                            settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith('# year_short:'):
                        string = line[13:].strip()
                        if string and string != 'Not available':
                            settingsDict['Year.short'] = string
                        continue
                    elif line.startswith('# year_long:'):
                        string = line[12:].strip()
                        if string and string != 'Not available':
                            settingsDict['Year.long'] = string
                        continue
                    elif line.startswith('# title:'):
                        string = line[8:].strip()
                        if string and string != 'Not available':
                            settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith('# URL:'):
                        string = line[6:].strip()
                        if string and string != 'Not available':
                            settingsDict['URL'] = string
                        continue
                    elif line.startswith('# copyright_short:'):
                        string = line[18:].strip()
                        if string and string != 'Not available':
                            settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith('# copyright_long:'):
                        string = line[17:].strip()
                        if string and string != 'Not available':
                            settingsDict['Copyright.long'] = string
                        continue
                    elif line[0] == '#':
                        logging.warning(
                            "VPLBible.load {} is skipping unknown line: {}".
                            format(vplType, line))
                        continue  # Just discard comment lines
                #elif vplType == 4:
                #if line.startswith( '; TITLE:' ):
                #string = line[8:].strip()
                #if string: settingsDict['TITLE'] = string
                #continue
                #elif line.startswith( '; ABBREVIATION:' ):
                #string = line[15:].strip()
                #if string: settingsDict['ABBREVIATION'] = string
                #continue
                #elif line.startswith( '; HAS ITALICS:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_ITALICS'] = string
                #continue
                #elif line.startswith( '; HAS FOOTNOTES:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_FOOTNOTES'] = string
                #continue
                #elif line.startswith( '; HAS FOOTNOTES' ):
                #string = line[14:].strip()
                #if string: settingsDict['HAS_FOOTNOTES'] = string
                #continue
                #elif line.startswith( '; HAS REDLETTER:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_REDLETTER'] = string
                #continue
                #elif line[0]==';':
                #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split(' ', 2)
                    #print( self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCodeText, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split(
                            ':')
                        #print( "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) )
                        if chapterNumberString == '':
                            chapterNumberString = '1'  # Handle a bug in some single chapter books in VPL
                    else:
                        print("Unexpected number of bits", self.givenName, BBB,
                              bookCodeText, chapterNumberString,
                              verseNumberString, len(bits), bits)

                    if not bookCodeText and not chapterNumberString and not verseNumberString:
                        print("Skipping empty line in {} {} {} {}:{}".format(
                            self.givenName, BBB, bookCodeText,
                            chapterNumberString, verseNumberString))
                        continue
                    if BibleOrgSysGlobals.debugFlag:
                        assert 2 <= len(bookCodeText) <= 4
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error(
                            "Invalid verse number field at {}/{} {}:{!r}".
                            format(bookCodeText, BBB, chapterNumberString,
                                   verseNumberString))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int(chapterNumberString)
                    verseNumber = int(verseNumberString)

                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        lastBBB = BBB
                        #if bookCodeText in ('Ge',): BBB = 'GEN'
                        if bookCodeText == 'Le' and lastBBB == 'GEN':
                            BBB = 'LEV'
                        elif bookCodeText in ('Jud', ) and lastBBB == 'JOS':
                            BBB = 'JDG'
                            #elif bookCodeText in ('Es',): BBB = 'EST'
                            #elif bookCodeText in ('Pr',): BBB = 'PRO'
                            #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG'
                            #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM'
                            #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP'
                            #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT
                            #elif bookCodeText in ('Jude',): BBB = 'JDE'
                            #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ'
                            #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN'
                        else:
                            BBB = BOS66.getBBBFromText(
                                bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BOS81.getBBBFromText(
                                    bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BOSx.getBBBFromText(
                                    bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText(
                                    bookCodeText)  # Try to guess
                        if not BBB:
                            logging.critical(
                                "VPL Bible: Unable to determine book code from text {!r} after {!r}={}"
                                .format(bookCodeText, lastBookCodeText,
                                        lastBBB))
                            halt

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0] == '«':
                        #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB == 'PSA' and verseNumberString == '1':  # Psalm title
                            vBits = vText[1:].split('»')
                            #print( "vBits", vBits )
                            thisBook.addLine('d', vBits[0])  # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                    #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #continue
                    if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                            '&lt;') and self.givenName == 'basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                    #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                    #if vText == lastVText:
                    #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #else:
                    #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                elif vplType in (2, 3):
                    bits = line.split('\t', 1)
                    #print( self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[
                        0][:2], bits[0][2:5], bits[0][5:]
                    #print( bookNumberString, chapterNumberString, verseNumberString )
                    while len(chapterNumberString
                              ) > 1 and chapterNumberString[0] == '0':
                        chapterNumberString = chapterNumberString[
                            1:]  # Remove leading zeroes
                    while len(verseNumberString
                              ) > 1 and verseNumberString[0] == '0':
                        verseNumberString = verseNumberString[
                            1:]  # Remove leading zeroes
                    bookCodeText, chapterNumber, verseNumber = int(
                        bookNumberString), int(chapterNumberString), int(
                            verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        lastBBB = BBB
                        bnDict = {
                            67: 'TOB',
                            68: 'JDT',
                            69: 'ESG',
                            70: 'WIS',
                            71: 'SIR',
                            72: 'BAR',
                            73: 'LJE',
                            74: 'PAZ',
                            75: 'SUS',
                            76: 'BEL',
                            77: 'MA1',
                            78: 'MA2',
                            79: 'MA3',
                            80: 'MA4',
                            81: 'ES1',
                            82: 'ES2',
                            83: 'MAN',
                            84: 'PS2',
                            85: 'PSS',
                            86: 'ODE',
                        }
                        if 1 <= bookCodeText <= 66:
                            BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                                bookCodeText)
                        else:
                            BBB = bnDict[bookCodeText]

                #elif vplType == 4:
                #if line.startswith( '$$ ' ):
                #if metadataName and metadataContents:
                #settingsDict[metadataName] = metadataContents
                #metadataName = None
                #pointer = line[3:]
                ##print( "pointer", repr(pointer) )
                #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                #metadataName = pointer[1:-1]
                #if metadataName:
                ##print( "metadataName", repr(metadataName) )
                #metadataContents = ''
                #else: # let's assume it's a BCV reference
                #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                #B_CV_Bits = pointer.split( ' ', 1 )
                #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                #bookCodeText, CVString = B_CV_Bits
                #chapterNumberString, verseNumberString = CVString.split( ':' )
                #chapterNumber = int( chapterNumberString )
                #verseNumber = int( verseNumberString )
                #if bookCodeText != lastBookCodeText: # We've started a new book
                #if bookCodeText in ('Ge',): BBB = 'GEN'
                #elif bookCodeText in ('Le',): BBB = 'LEV'
                #elif bookCodeText in ('La',): BBB = 'LAM'
                #else:
                ##print( "4bookCodeText =", repr(bookCodeText) )
                ##BBB = BOS.getBBBFromText( bookCodeText )  # Try to guess
                #BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                ##print( "4BBB =", repr(BBB) )
                #else: print( "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )
                #continue # Just save the pointer information which refers to the text on the next line
                #else: # it's not a $$ line
                #text = line
                ##print( "text", repr(text) )
                #if metadataName:
                #metadataContents += ('\n' if metadataContents else '') + text
                #continue
                #else:
                #vText = text
                ## Handle bits like (<scripref>Pr 2:7</scripref>)
                #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                ##if '\\' in vText: print( 'VPL vText', repr(vText) )
                #if vplType == 4: # Forge for SwordSearcher
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                ## Convert {stuff} to footnotes
                #match = re.search( '\\{(.+?)\\}', vText )
                #while match:
                #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                #match = re.search( '\\{(.+?)\\}', vText )
                ## Convert [stuff] to added fields
                #match = re.search( '\\[(.+?)\\]', vText )
                #while match:
                #addText = '\\add {}\\add*'.format( match.group(1) )
                #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                #match = re.search( '\\[(.+?)\\]', vText )
                #for badChar in '{}[]':
                #if badChar in vText:
                #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                #break

                else:
                    logging.critical('Unknown VPL type {}'.format(vplType))
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                        halt

                if bookCodeText:
                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        if lastBookCodeText is not None:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCodeText = bookCodeText
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "VPLBible{} could not figure out {!r} book code"
                                .format(vplType, bookCodeText))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCodeText,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCodeText,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCodeText,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCodeText yet
                    logging.warning(
                        "VPLBible.load{} is skipping unknown pre-book line: {}"
                        .format(vplType, line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #print( "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata('VPL')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Esempio n. 34
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata is None:
            self.suppliedMetadata = {}
        self.suppliedMetadata["Unbound"] = {}

        lastLine, lineCount = "", 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ""
        with open(self.sourceFilepath, encoding=self.encoding) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                # if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                # logging.info( "      UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" )
                # line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line[-1] == "\n":
                    line = line[:-1]  # Removing trailing newline character
                if not line:
                    continue  # Just discard blank lines
                lastLine = line
                # print ( 'UB file line is "' + line + '"' )
                if line[0] == "#":
                    hashBits = line[1:].split("\t")
                    if len(hashBits) == 2 and hashBits[1]:  # We have some valid meta-data
                        self.suppliedMetadata["Unbound"][hashBits[0]] = hashBits[1]
                        # if hashBits[0] == 'name': self.name = hashBits[1]
                        # elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        # elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        # elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        # elif hashBits[0] == 'language': self.language = hashBits[1]
                        # elif hashBits[0] == 'note': self.note = hashBits[1]
                        # elif hashBits[0] == 'columns': self.columns = hashBits[1]
                        # logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue  # Just discard comment lines

                bits = line.split("\t")
                # print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = (
                        bits
                    )
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = (
                        bits
                    )
                elif len(bits) == 1 and self.givenName.startswith("lxx_a_parsing_"):
                    logging.warning(
                        _("Skipping bad {!r} line in {} {} {} {}:{}").format(
                            line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )
                    continue
                else:
                    print(
                        "Unexpected number of bits",
                        self.givenName,
                        BBB,
                        bookCode,
                        chapterNumberString,
                        verseNumberString,
                        len(bits),
                        bits,
                    )
                    halt

                if NRSVA_bookCode:
                    assert len(NRSVA_bookCode) == 3
                if NRSVA_chapterNumberString:
                    assert NRSVA_chapterNumberString.isdigit()
                if NRSVA_verseNumberString:
                    assert NRSVA_verseNumberString.isdigit()

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print(
                        "Skipping empty line in {} {} {} {}:{}".format(
                            self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )
                    continue
                if BibleOrgSysGlobals.debugFlag:
                    assert len(bookCode) == 3
                if BibleOrgSysGlobals.debugFlag:
                    assert chapterNumberString.isdigit()
                if BibleOrgSysGlobals.debugFlag:
                    assert verseNumberString.isdigit()

                if subverseNumberString:
                    logging.warning(
                        _("subverseNumberString {!r} in {} {} {}:{}").format(
                            subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )

                vText = vText.strip()  # Remove leading and trailing spaces
                if not vText:
                    continue  # Just ignore blank verses I think
                if vText == "+":
                    continue  # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumberString.isdigit()
                    sequenceNumber = int(sequenceNumberString)
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumber > lastSequence or self.givenName in (
                            "gothic_latin",
                            "hebrew_bhs_consonants",
                            "hebrew_bhs_vowels",
                            "latvian_nt",
                            "ukrainian_1871",
                        )  # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.stashBook(thisBook)
                    BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromUnboundBibleCode(bookCode)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "Unbound Bible Book object"
                    thisBook.objectTypeString = "Unbound"
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumber > lastChapterNumber or BBB == "ESG"  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                            )
                        )
                    thisBook.addLine("c", chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").format(
                            self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                        )
                    )
                    continue
                if (
                    BBB == "PSA"
                    and verseNumberString == "1"
                    and vText.startswith("&lt;")
                    and self.givenName == "basic_english"
                ):
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format(
                            lastVerseNumber,
                            verseNumber,
                            self.givenName,
                            BBB,
                            bookCode,
                            chapterNumberString,
                            verseNumberString,
                        )
                    )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").format(
                                verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                            )
                        )
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}").format(
                                verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString
                            )
                        )
                thisBook.addLine("v", verseNumberString + " " + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.stashBook(thisBook)
        self.applySuppliedMetadata("Unbound")  # Copy some to self.settingsDict
        self.doPostLoadProcessing()
Esempio n. 35
0
    def load(self):
        """
        Load all the books out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("load()"))
        assert self.preloadDone

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[
                'MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("  MySword saving", BBB, bookCount + 1)
                        self.stashBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BibleOrganisationalSystem.getNumVersesList(
                        BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        del self.cursor
        self.applySuppliedMetadata('MySword')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()