예제 #1
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML book..."))

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib, value in book.items():
            if attrib == "bnumber":
                bookNumber = value
            elif attrib == "bname":
                bookName = value
            elif attrib == "bsname":
                bookShortName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookNumber:
            try:
                BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                    bookNumber)
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB(bookName)

        if BBB:
            if Globals.verbosityLevel > 2:
                print(_("Validating {} {}...").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = "Haggai XML Bible Book object"
            thisBook.objectTypeString = "Haggai"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format(BBB)
                    Globals.checkXMLNoAttributes(element, sublocation, 'jhl6')
                    Globals.checkXMLNoSubelements(element, sublocation, 'jk21')
                    Globals.checkXMLNoTail(element, sublocation, 'kjh6')
                    thisBook.appendLine('mt', element.text)
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    Globals.checkXMLNoText(element, sublocation, 'j3jd')
                    Globals.checkXMLNoTail(element, sublocation, 'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        HaggaiXMLBible.chapterTag, element.tag))
            if Globals.verbosityLevel > 2:
                print("  Saving {} into results...".format(BBB))
            self.saveBook(thisBook)
예제 #2
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print( _("Validating XML book...") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB( bookName )

        if BBB:
            if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = "Haggai XML Bible Book object"
            thisBook.objectTypeString = "Haggai"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format( BBB )
                    Globals.checkXMLNoAttributes( element, sublocation, 'jhl6' )
                    Globals.checkXMLNoSubelements( element, sublocation, 'jk21' )
                    Globals.checkXMLNoTail( element, sublocation, 'kjh6' )
                    thisBook.appendLine( 'mt', element.text )
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    Globals.checkXMLNoText( element, sublocation, 'j3jd' )
                    Globals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find '{}' but got '{}'".format( HaggaiXMLBible.chapterTag, element.tag ) )
            if Globals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
            self.saveBook( thisBook )
예제 #3
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3:
            print(_("Validating OpenSong XML book..."))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBB(bookName)
            if BBB:
                if Globals.verbosityLevel > 2:
                    print(_("Validating {} {}...").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = Globals.BibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                thisBook.appendLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     ProgNameVersion))
                thisBook.appendLine('h', bookName)
                thisBook.appendLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        Globals.checkXMLNoText(element, sublocation, 'j3jd')
                        Globals.checkXMLNoTail(element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find '{}' but got '{}'".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                if Globals.verbosityLevel > 2:
                    print("  Saving {} into results...".format(BBB))
                self.saveBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: '{}'").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName
예제 #4
0
    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print( _("Validating OpenSong XML book...") )

        # Process the div attributes first
        BBB = bookName = None
        for attrib,value in book.items():
            if attrib=="n":
                bookName = value
            else: logging.warning( "Unprocessed '{}' attribute ({}) in book element".format( attrib, value ) )
        if bookName:
            BBB = self.genericBOS.getBBB( bookName )
            if BBB:
                if Globals.verbosityLevel > 2: print( _("Validating {} {}...").format( BBB, bookName ) )
                thisBook = BibleBook( self, BBB )
                thisBook.objectNameString = "OpenSong XML Bible Book object"
                thisBook.objectTypeString = "OpenSong"
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = Globals.BibleBooksCodes.getUSFMAbbreviation( BBB )
                thisBook.appendLine( 'id', '{} imported by {}'.format( USFMAbbreviation.upper(), ProgNameVersion ) )
                thisBook.appendLine( 'h', bookName )
                thisBook.appendLine( 'mt1', bookName )
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format( BBB )
                        Globals.checkXMLNoText( element, sublocation, 'j3jd' )
                        Globals.checkXMLNoTail( element, sublocation, 'al1d' )
                        self.__validateAndExtractChapter( BBB, thisBook, element )
                    else: logging.error( "Expected to find '{}' but got '{}'".format( OpenSongXMLBible.chapterTag, element.tag ) )
                if Globals.verbosityLevel > 2: print( "  Saving {} into results...".format( BBB ) )
                self.saveBook( thisBook )
            else: logging.error( _("OpenSong load doesn't recognize book name: '{}'").format( bookName ) ) # no BBB
        else: logging.error( _("OpenSong load can't find a book name") ) # no bookName
예제 #5
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        loadErrors = []

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a e-Sword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a e-Sword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        # Just get some information from the file
        cursor.execute( 'select * from Bible' )
        rows = cursor.fetchall()
        numRows = len(rows)
        if Globals.debugFlag or Globals.verbosityLevel>2: print( '{} rows found'.format( numRows ) )
        BBBn1 = rows[0][0]
        if Globals.debugFlag or Globals.verbosityLevel>2: print( 'First book number is {}'.format( BBBn1 ) )
        del rows
        BBB1 = None
        if BBBn1 <= 66: BBB1 = Globals.BibleBooksCodes.getBBBFromReferenceNumber( BBBn1 )


        testament = BBB = None
        booksExpected = textLineCountExpected = 0
        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957
        elif self.settingsDict['Abbreviation'] == 'VIN2011': # Handle encoding error
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['Apocrypha']: # incomplete
            testament, BBB = 'AP', 'XXX'
            booksExpected, textLineCountExpected = 99, 999999
            halt
        if not BBB:
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            if 0:
                cursor.execute( 'select * from Bible' )
                rows = cursor.fetchall()
                print( "rows", len(rows) )
                for row in rows:
                    assert( len(row) == 4 )
                    BBBn, C, V, text = row # First three are integers, the last is a string
                    print( BBBn, C, V, repr(text) )
                    if C==2: break
                del rows # Takes a lot of memory
        if Globals.debugFlag or Globals.verbosityLevel>2:
            print( "Testament={} BBB={} BBB1={}, bE={}, tLCE={} nR={}".format( testament, BBB, BBB1, booksExpected, textLineCountExpected, numRows ) )
        if BBB1 != BBB:
            logging.critical( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            loadErrors.append( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            if not BBB: BBB = BBB1
        if numRows != textLineCountExpected:
            logging.critical( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
            loadErrors.append( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
        #halt

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self.name, BBB )
        thisBook.objectNameString = "e-Sword Bible Book object"
        thisBook.objectTypeString = "e-Sword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except: # This reference is missing
                #print( "something wrong at", BBB, C, V )
                #if Globals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'e-Sw file line is "' + line + '"' )
            if line is None: logging.warning( "ESwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "ESwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "ESwordBible.load: Probably encrypted module: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                        break
                elif not line: logging.warning( "ESwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    if '\r' in line or '\n' in line:
                        if Globals.debugFlag:
                            logging.warning( "ESwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                        #print( repr(line) )
                    while line and line[-1] in '\r\n': line = line[:-1] # Remove CR/LFs from the end
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) # Replace CR/LFs in the middle

            #print( "e-Sword.load", BBB, C, V, repr(line) )
            self.handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self.name, BBB )
                    thisBook.objectNameString = "e-Sword Bible Book object"
                    thisBook.objectTypeString = "e-Sword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.appendLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.appendLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.appendLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        if Globals.strictCheckingFlag or Globals.debugFlag: self.checkForExtraMaterial( cursor, BOS )
        cursor.close()
        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        self.doPostLoadProcessing()
예제 #6
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        def decodeVerse(encodedVerseString):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace('@^', '\\\\p ')
            verseString = verseString.replace('@0', '\\\\m ')
            verseString = verseString.replace('@1', '\\\\q1 ').replace(
                '@2', '\\\\q2 ').replace('@3',
                                         '\\\\q3 ').replace('@4', '\\q4 ')
            verseString = verseString.replace('@8', '\\\\m ')
            # Character markers (marked now with single backslash)
            verseString = verseString.replace('@6',
                                              '\\wj ').replace('@5', '\\wj*')
            verseString = verseString.replace('@9', '\\add ').replace(
                '@7', '\\add*')  # or \\i ???
            verseString = re.sub(r'@<f([0-9])@>@/', r'\\ff\1', verseString)
            verseString = re.sub(r'@<x([0-9])@>@/', r'\\xx\1', verseString)
            #print( repr( verseString ) )
            assert ('@' not in verseString)
            return verseString

        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(
        ), OrderedDict(), {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      YETBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'YETBible file line is "' + line + '"' )

                bits = line.split('\t')
                #print( self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert (len(bits) == 3)
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert (2 <= len(locale) <= 3)
                        if locale == 'in':
                            locale = 'id'  # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert (3 <= len(bits) <= 4)
                    thisBBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bits[1])
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert (len(bits) == 5)
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[
                        1:]
                    if Globals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB:  # We have a new book
                        if lastBBB is not None:  # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert (BBB in bookNameDict)
                        bookLines = OrderedDict()  # Keys are (C,V) strings
                    verseString = decodeVerse(encodedVerseString)
                    bookLines[(chapterNumberString, verseNumberString
                               )] = verseString  # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert (len(bits) == 5)
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[
                        1:]
                    if Globals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    headingString = encodedHeadingString.replace(
                        '@9', '\\it ').replace('@7', '\\it*')
                    #print( repr(encodedHeadingString), repr(headingString) )
                    assert ('@' not in headingString)
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = headingString, [
                                 ]  # Blank refList
                    continue
                elif bits[
                        0] == 'parallel':  # These lines optionally follow pericope lines
                    assert (len(bits) == 2)
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    refList.append(bits[1])
                    #print( "parallel2", repr(heading), refList )
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert (len(bits) == 6)
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if Globals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                        assert (indexNumberString.isdigit())
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    noteString = re.sub(
                        r'@<ta(.+?)@>', r'', noteString
                    )  # Get rid of these encoded BCV references for now
                    noteString = re.sub(
                        r'@<to(.+?)@>', r'', noteString
                    )  # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace('@/', '')
                    #print( repr(encodedNoteString), repr(noteString) )
                    assert ('@' not in noteString)
                    xrefDict[(BBB, chapterNumberString, verseNumberString,
                              indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert (len(bits) == 6)
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if Globals.debugFlag:
                        assert (bookNumberString.isdigit())
                        assert (chapterNumberString.isdigit())
                        assert (verseNumberString.isdigit())
                        assert (indexNumberString.isdigit())
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    assert ('@' not in noteString)
                    footnoteDict[(BBB, chapterNumberString, verseNumberString,
                                  indexNumberString)] = noteString
                    continue
                else:
                    print("YETBible: Unknown line type", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)
                    halt
            bookDict[lastBBB] = bookLines  # Save the last book

            #if bookCode != lastBookCode: # We've started a new book
            #if lastBookCode != -1: # Better save the last book
            #self.saveBook( thisBook )
            #BBB = Globals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode )
            #thisBook = BibleBook( self, BBB )
            #thisBook.objectNameString = "YET Bible Book object"
            #thisBook.objectTypeString = "YET"
            #lastBookCode = bookCode
            #lastChapterNumber = lastVerseNumber = -1

            #if chapterNumber != lastChapterNumber: # We've started a new chapter
            #if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
            #if chapterNumber == 0:
            #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #thisBook.appendLine( 'c', chapterNumberString )
            #lastChapterNumber = chapterNumber
            #lastVerseNumber = -1

            ## Handle the verse info
            #if verseNumber==lastVerseNumber and vText==lastVText:
            #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #continue
            #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
            ## Move Psalm titles to verse zero
            #verseNumber = 0
            #if verseNumber < lastVerseNumber:
            #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #elif verseNumber == lastVerseNumber:
            #if vText == lastVText:
            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #else:
            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
            #thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
            #lastVText = vText
            #lastVerseNumber = verseNumber

        # Now process the books
        for BBB, bkData in bookDict.items():
            #print( "Processing", BBB )
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = "YET Bible Book object"
            thisBook.objectTypeString = "YET"
            lastChapterNumberString = None
            for (chapterNumberString,
                 verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB, chapterNumberString,
                        verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.appendLine('s', heading)
                    if refList:
                        refString = ""
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.appendLine('r', '(' + refString + ')')
                # Insert footnotes and cross-references
                while ('\\ff' in verseString):
                    #print( "footnote", repr(verseString) )
                    fIx = verseString.index('\\ff')
                    caller = verseString[fIx + 3]
                    #print( "fcaller", repr(caller) )
                    assert (caller.isdigit())
                    note = footnoteDict[(BBB, chapterNumberString,
                                         verseNumberString, caller)]
                    #print( "fnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[
                                                  fIx + 4:]
                    #print( "fvS", repr(verseString) )
                while ('\\xx' in verseString):
                    #print( "xref", repr(verseString) )
                    fIx = verseString.index('\\xx')
                    caller = verseString[fIx + 3]
                    #print( "xcaller", repr(caller) )
                    assert (caller.isdigit())
                    note = xrefDict[(BBB, chapterNumberString,
                                     verseNumberString, caller)]
                    #print( "xnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[
                                                  fIx + 4:]
                    #print( "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.appendLine('c', chapterNumberString)
                    lastChapterNumberString = chapterNumberString
                #print( BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith(
                        '\\\\'):  # It's an initial paragraph marker
                    if verseString[3] == ' ':
                        marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4] == ' ':
                        marker, verseString = verseString[2:4], verseString[5:]
                    else:
                        halt
                    #print( '', '\\'+marker )
                    thisBook.appendLine(marker, '')
                assert (not verseString.startswith('\\\\'))
                bits = verseString.split(
                    '\\\\'
                )  # Split on paragraph markers (but not character markers)
                for j, bit in enumerate(bits):
                    #print( "loop", j, repr(bit), repr(verseString) )
                    if j == 0:
                        thisBook.appendLine(
                            'v',
                            verseNumberString + ' ' + verseString.rstrip())
                    else:
                        if bit[1] == ' ': marker, bit = bit[0], bit[2:]
                        elif bit[2] == ' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #print( "mV", marker, repr(bit), repr(verseString) )
                        thisBook.appendLine(marker, bit.rstrip())
            self.saveBook(thisBook)
        self.doPostLoadProcessing()
예제 #7
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      UnboundBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #print ( 'UB file line is "' + line + '"' )
                if line[0] == '#':
                    hashBits = line[1:].split('\t')
                    if len(hashBits) == 2 and hashBits[
                            1]:  # We have some valid meta-data
                        if hashBits[0] == 'name': self.name = hashBits[1]
                        elif hashBits[0] == 'filetype':
                            self.filetype = hashBits[1]
                        elif hashBits[0] == 'copyright':
                            self.copyright = hashBits[1]
                        elif hashBits[0] == 'abbreviation':
                            self.abbreviation = hashBits[1]
                        elif hashBits[0] == 'language':
                            self.language = hashBits[1]
                        elif hashBits[0] == 'note':
                            self.note = hashBits[1]
                        elif hashBits[0] == 'columns':
                            self.columns = hashBits[1]
                        # Should some of these be placed into self.settingsDict???
                        logging.warning(
                            "Unknown UnboundBible meta-data field '{}' = '{}'".
                            format(hashBits[0], hashBits[1]))
                    continue  # Just discard comment lines

                bits = line.split('\t')
                #print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith(
                        'lxx_a_parsing_'):
                    logging.warning(
                        _("Skipping bad '{}' line in {} {} {} {}:{}").format(
                            line, self.givenName, BBB, bookCode,
                            chapterNumberString, verseNumberString))
                    continue
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bookCode, chapterNumberString, verseNumberString,
                          len(bits), bits)
                    halt

                if NRSVA_bookCode: assert (len(NRSVA_bookCode) == 3)
                if NRSVA_chapterNumberString:
                    assert (NRSVA_chapterNumberString.isdigit())
                if NRSVA_verseNumberString:
                    assert (NRSVA_verseNumberString.isdigit())

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print("Skipping empty line in {} {} {} {}:{}".format(
                        self.givenName, BBB, bookCode, chapterNumberString,
                        verseNumberString))
                    continue
                if Globals.debugFlag: assert (len(bookCode) == 3)
                if Globals.debugFlag: assert (chapterNumberString.isdigit())
                if Globals.debugFlag: assert (verseNumberString.isdigit())

                if subverseNumberString:
                    logging.warning(
                        _("subverseNumberString '{}' in {} {} {}:{}").format(
                            subverseNumberString, BBB, bookCode,
                            chapterNumberString, verseNumberString))

                vText = vText.strip()  # Remove leading and trailing spaces
                if not vText: continue  # Just ignore blank verses I think
                if vText == '+':
                    continue  # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)
                if sequenceNumberString:
                    if Globals.debugFlag:
                        assert (sequenceNumberString.isdigit())
                    sequenceNumber = int(sequenceNumberString)
                    if Globals.debugFlag:                        assert( sequenceNumber > lastSequence or \
      self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) ) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.saveBook(thisBook)
                    BBB = Globals.BibleBooksCodes.getBBBFromUnboundBibleCode(
                        bookCode)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "Unbound Bible Book object"
                    thisBook.objectTypeString = "Unbound"
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if Globals.debugFlag:
                        assert (chapterNumber > lastChapterNumber
                                or BBB == 'ESG'
                                )  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode,
                                chapterNumberString, verseNumberString))
                    thisBook.appendLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.appendLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()
예제 #8
0
class GreekNT(Bible):
    """
    Class for handling a Greek NT object (which may contain one or more Bible books)

    Note: BBB is used in this class to represent the three-character referenceAbbreviation.
    """
    def __init__(self, sourceFilepath, givenName=None, encoding='utf-8'):
        """
        Constructor: expects the filepath of the source folder.
        Loads (and crudely validates the file(s)) into ???.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "Greek NT Bible object"
        self.objectTypeString = "GreekNT"

        # Now we can set our object variables
        self.sourceFilepath, self.givenName, self.encoding = sourceFilepath, givenName, encoding

        self.title = self.version = self.date = None
        self.tree = self.header = self.frontMatter = self.divs = self.divTypesString = None
        #self.bkData, self.USFMBooks = OrderedDict(), OrderedDict()
        self.lang = self.language = None

        # Do a preliminary check on the readability of our files
        self.possibleFilenames = []
        if os.path.isdir(
                self.sourceFilepath
        ):  # We've been given a folder -- see if we can find the files
            # There's no standard for OSIS xml file naming
            fileList = os.listdir(self.sourceFilepath)
            #print( len(fileList), fileList )
            # First try looking for OSIS book names
            for filename in fileList:
                if filename.lower().endswith('.txt'):
                    thisFilepath = os.path.join(self.sourceFilepath, filename)
                    #if Globals.debugFlag: print( "Trying {}...".format( thisFilepath ) )
                    if os.access(thisFilepath,
                                 os.R_OK):  # we can read that file
                        self.possibleFilenames.append(filename)
        elif not os.access(self.sourceFilepath, os.R_OK):
            logging.critical("GreekNT: File '{}' is unreadable".format(
                self.sourceFilepath))
            return  # No use continuing
        #print( self.possibleFilenames ); halt

        self.name = self.givenName
        #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML
        #gNTfc.loadMorphGNT()
        #self.books = gNTfc.bookData

    # end of __init__

    #def x__str__( self ):
    #"""
    #This method returns the string representation of a Bible book code.

    #@return: the name of a Bible object formatted as a string
    #@rtype: string
    #"""
    #result = "Greek Bible converter object"
    ##if self.title: result += ('\n' if result else '') + self.title
    ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version )
    ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date )
    #if len(self.books)==1:
    #for BBB in self.books: break # Just get the first one
    #result += ('\n' if result else '') + "  " + _("Contains one book: {}").format( BBB )
    #else: result += ('\n' if result else '') + "  " + _("Number of books = {}").format( len(self.books) )
    #return result
    ## end of __str__

    def load(self):
        if Globals.verbosityLevel > 2:
            print("Loading Greek NT from {}...".format(self.sourceFilepath))
        for BBB in Greek.morphgntBooks:
            self.loadBook(BBB, Greek.morphgntFilenames[BBB])
        if Globals.verbosityLevel > 3:
            print("{} books loaded.".format(len(self.books)))
        #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book
        #for filename in self.possibleFilenames:
        #pathname = os.path.join( self.sourceFilepath, filename )
        #self.loadBook( pathname )
        #else: # most often we have all the Bible books in one file
        #self.loadFile( self.sourceFilepath )
        self.doPostLoadProcessing()

    # end of load

    def loadBook(self, BBB, filename, encoding='utf-8'):
        def unpackLine(line):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert (len(bits) == 7)
            #print( bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0] == '0': bn = bn[1:]  # Remove any leading zero
            if cn[0] == '0': cn = cn[1:]  # Remove any leading zero
            if vn[0] == '0': vn = vn[1:]  # Remove any leading zero
            #print( b, c, v )

            POSCode = bits[1]
            assert (len(POSCode) == 2)
            assert (POSCode in Greek.POSCodes.keys())

            parsingCode = bits[2]
            assert (len(parsingCode) == 8)
            #print( parsingCode )
            for j, char in enumerate(parsingCode):
                assert (char in Greek.parsingCodes[j])
            assert (parsingCode[0] in Greek.personCodes)
            assert (parsingCode[1] in Greek.tenseCodes)
            assert (parsingCode[2] in Greek.voiceCodes)
            assert (parsingCode[3] in Greek.modeCodes)
            assert (parsingCode[4] in Greek.caseCodes)
            assert (parsingCode[5] in Greek.numberCodes)
            assert (parsingCode[6] in Greek.genderCodes)
            assert (parsingCode[7] in Greek.degreeCodes)

            return (
                bn,
                cn,
                vn,
            ), (
                POSCode,
                parsingCode,
            ), (
                bits[3],
                bits[4],
                bits[5],
                bits[6],
            )

        # end of unpackLine

        self.thisBook = BibleBook(self.name, BBB)
        self.thisBook.objectNameString = "Morph Greek NT Bible Book object"
        self.thisBook.objectTypeString = "MorphGNT"
        filepath = os.path.join(self.sourceFilepath, filename)
        if Globals.verbosityLevel > 2:
            print("  Loading {}...".format(filename))
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open(filepath, encoding=encoding
                  ) as myFile:  # Automatically closes the file when done
            if 1:  #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount == 1 and encoding.lower(
                    ) == 'utf-8' and line and line[0] == chr(65279):  #U+FEFF
                        logging.info(
                            "GreekNT: Detected UTF-16 Byte Order Marker in {}".
                            format(filename))
                        line = line[1:]  # Remove the UTF-8 Byte Order Marker
                    if line[-1] == '\n':
                        line = line[:-1]  # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #print ( 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine(line)
                    #print( unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.appendLine('c', cn)
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.appendLine('v', vn)
                        lastV = vn
                    self.thisBook.appendLine(
                        'vw', "{}/{}/{}/{}".format(word1, word2, word3, word4))
                    self.thisBook.appendLine(
                        'g', "{}/{}".format(POSCode, parsingCode))
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #print( reference,bits[1],bits[2] ); halt
            if 0:  #except:
                logging.critical("Invalid line in " + filepath +
                                 " -- line ignored at " + str(lineCount))
                if lineCount > 1: print('Previous line was: ', lastLine)
                else: print('Possible encoding error -- expected', encoding)
        if self.thisBook:
            if Globals.verbosityLevel > 3:
                print("    {} words loaded from {}".format(
                    len(self.thisBook), filename))
            self.saveBook(self.thisBook)
            #self.books[BBB] = self.thisBook

    # end of loadBook

    def xanalyzeWords(self):
        """ Go through the NT data and do some filing and sorting of the Greek words. """
        if Globals.verbosityLevel > 3:
            print("analyzeWords: have {} books in the loaded NT".format(
                len(self.books)))
        self.wordCounts = {}  # Wordcount organized by BBB
        self.wordCounts['Total'] = 0
        self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {}
        for BBB in self.books:
            wordCount = len(self.books[BBB])
            self.wordCounts[BBB] = wordCount
            self.wordCounts['Total'] += wordCount
            if Globals.verbosityLevel > 3:
                print("  analyzeWords: {} has {} Greek words".format(
                    BBB, wordCount))
            for reference, parsing, (
                    punctuatedWord, actualWord, normalizedWord, lemma
            ) in self.books[BBB]:  # Stuff is: reference,parsing,words
                # File the actual words
                if actualWord not in self.actualWordsToNormalized:
                    self.actualWordsToNormalized[actualWord] = [(
                        [reference],
                        normalizedWord,
                    )]
                    #print( "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] )
                else:  # we've already had this word before
                    previous = self.actualWordsToNormalized[actualWord]
                    #print( "had", actualWord, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList, oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert (not found)
                            if reference not in oldRefList:
                                oldRefList.append(reference)
                                newList.append((
                                    oldRefList,
                                    oldnormalizedWord,
                                ))
                                changed = True
                            found = True
                        else:
                            newList.append((
                                oldRefList,
                                oldnormalizedWord,
                            ))
                    if not found:
                        #print( "  Found a new", normalizedWord, "normalized word for", actualWord, "was", previous )
                        newList.append((
                            [reference],
                            normalizedWord,
                        ))
                        changed = True
                    if changed:
                        self.actualWordsToNormalized[actualWord] = newList
                        #print( "  now have", newList )
                # File the normalized words
                if normalizedWord not in self.normalizedWordsToActual:
                    self.normalizedWordsToActual[normalizedWord] = [(
                        [reference],
                        actualWord,
                    )]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] )
                else:  # we've already had this word before
                    previous = self.normalizedWordsToActual[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, actualWord )
                    found = changed = False
                    newList = []
                    for oldRefList, oldActualWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldActualWord )
                        if actualWord == oldActualWord:
                            assert (not found)
                            if reference not in oldRefList:
                                oldRefList.append(reference)
                                newList.append((
                                    oldRefList,
                                    oldActualWord,
                                ))
                                changed = True
                            found = True
                        else:
                            newList.append((
                                oldRefList,
                                oldActualWord,
                            ))
                    if not found:
                        newList.append((
                            [reference],
                            actualWord,
                        ))
                        changed = True
                    if changed:
                        self.normalizedWordsToActual[normalizedWord] = newList
                        #print( "  now have", newList )
                if normalizedWord not in self.normalizedWordsToParsing:
                    self.normalizedWordsToParsing[normalizedWord] = [(
                        [reference],
                        parsing,
                    )]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] )
                else:  # we've already had this word before
                    previous = self.normalizedWordsToParsing[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, parsing )
                    found = changed = False
                    newList = []
                    for oldRefList, oldParsing in previous:
                        #print( "  oRL", oldRefList, "oP", oldParsing )
                        if parsing == oldParsing:
                            assert (not found)
                            if reference not in oldRefList:
                                oldRefList.append(reference)
                                newList.append((
                                    oldRefList,
                                    oldParsing,
                                ))
                                changed = True
                            found = True
                        else:
                            newList.append((
                                oldRefList,
                                oldParsing,
                            ))
                    if not found:
                        newList.append((
                            [reference],
                            parsing,
                        ))
                        changed = True
                    if changed:
                        self.normalizedWordsToParsing[normalizedWord] = newList
                        #print( "  now have", newList )
                # File the self.lemmasToNormalizedWords
                if lemma not in self.lemmasToNormalizedWords:
                    self.lemmasToNormalizedWords[lemma] = [(
                        [reference],
                        normalizedWord,
                    )]
                    #print( "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] )
                else:  # we've already had this word before
                    previous = self.lemmasToNormalizedWords[lemma]
                    #print( "had", lemma, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList, oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert (not found)
                            if reference not in oldRefList:
                                oldRefList.append(reference)
                                newList.append((
                                    oldRefList,
                                    oldnormalizedWord,
                                ))
                                changed = True
                            found = True
                        else:
                            newList.append((
                                oldRefList,
                                oldnormalizedWord,
                            ))
                    if not found:
                        newList.append((
                            [reference],
                            normalizedWord,
                        ))
                        changed = True
                    if changed:
                        self.lemmasToNormalizedWords[lemma] = newList
                        #print( "  now have", newList )
        if Globals.verbosityLevel > 2:
            print("analyzeWords: NT has {} Greek words".format(
                self.wordCounts['Total']))
        if Globals.verbosityLevel > 2:
            print("analyzeWords: NT has {} actual Greek words".format(
                len(self.actualWordsToNormalized)))
        if Globals.verbosityLevel > 3:
            for j, aW in enumerate(self.actualWordsToNormalized.keys()):
                print("  ", aW, self.actualWordsToNormalized[aW])
                if j == 6: break
        if Globals.verbosityLevel > 2:
            print("analyzeWords: NT has {} normalized Greek words".format(
                len(self.normalizedWordsToActual)))
        if Globals.verbosityLevel > 3:
            for j, nW in enumerate(self.normalizedWordsToActual.keys()):
                print("  ", nW, self.normalizedWordsToActual[nW])
                if j == 6: break
        if Globals.verbosityLevel > 2:
            print("analyzeWords: NT has {} normalized Greek words".format(
                len(self.normalizedWordsToParsing)))
        if Globals.verbosityLevel > 3:
            for j, nW in enumerate(self.normalizedWordsToParsing.keys()):
                print("  ", nW, self.normalizedWordsToParsing[nW])
                if j == 6: break
        if Globals.verbosityLevel > 2:
            print("analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".
                  format(len(self.lemmasToNormalizedWords)))
        if Globals.verbosityLevel > 3:
            for j, lem in enumerate(self.lemmasToNormalizedWords.keys()):
                print("  ", lem, self.lemmasToNormalizedWords[lem])
                if j == 6: break
        if 0:
            print("The following actual words have multiple normalized forms:")
            for j, aW in enumerate(self.actualWordsToNormalized.keys()):
                if len(self.actualWordsToNormalized[aW]) > 1:
                    print("  ", aW)
                    for entry in self.actualWordsToNormalized[aW]:
                        print("    ", entry[1],
                              self.normalizedWordsToParsing[entry[1]],
                              entry[0])
예제 #9
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        status = 0  # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and self.encoding.lower(
                ) == 'utf-8' and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        "      DrupalBible.load: Detected UTF-16 Byte Order Marker"
                    )
                    line = line[1:]  # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines

                #print ( 'DB file line is "' + line + '"' )
                if line[0] == '#': continue  # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning(
                            "Unknown DrupalBible first line: {}".format(
                                repr(line)))

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else:  # Get the version name details
                        bits = line.split('|')
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else:  # Get the book name details
                        bits = line.split('|')
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert (bookShortName == bookCode)
                        BBBresult = Globals.BibleBooksCodes.getBBBFromDrupalBibleCode(
                            bookCode)
                        BBB = BBBresult if isinstance(
                            BBBresult, str
                        ) else BBBresult[
                            0]  # Result can be string or list of strings (best guess first)
                        bookDetails[
                            BBB] = bookFullName, bookShortName, numChapters

                elif status == 2:  # Get the verse text
                    bits = line.split('|')
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark:
                        print(repr(lineMark))
                        halt
                    BBBresult = Globals.BibleBooksCodes.getBBBFromDrupalBibleCode(
                        bookCode)
                    BBB = BBBresult if isinstance(
                        BBBresult, str
                    ) else BBBresult[
                        0]  # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.saveBook(thisBook)
                        thisBook = BibleBook(self, BBB)
                        thisBook.objectNameString = "DrupalBible Bible Book object"
                        thisBook.objectTypeString = "DrupalBible"
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.appendLine('c', chapterNumberString)
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace('<', '\\it ').replace(
                        '>', '\\it*')
                    thisBook.appendLine('v',
                                        verseNumberString + ' ' + verseText)

                else:
                    halt

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()
예제 #10
0
class USFXXMLBible( Bible ):
    """
    Class to load and manipulate USFX Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ):
        """
        Create the internal USFX Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFX XML Bible object"
        self.objectTypeString = "USFX"

        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters

        # Now we can set our object variables
        self.name = self.givenName
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFX Bible"
        if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects

        # Do a preliminary check on the readability of our folder
        if not os.access( self.sourceFolder, os.R_OK ):
            logging.error( "USFXXMLBible: Folder '{}' is unreadable".format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        self.sourceFilename = self.sourceFilepath = None
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundFiles.append( something )
            else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, foundFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in '{}'".format( self.sourceFolder ) )
            return # No use continuing

        #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles )
        numFound = 0
        for thisFilename in sorted( foundFiles ):
            firstLines = Globals.peekIntoFile( thisFilename, sourceFolder, numLines=3 )
            if not firstLines or len(firstLines)<2: continue
            if not firstLines[0].startswith( '<?xml version="1.0"' ) \
            and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM
                if Globals.verbosityLevel > 2: print( "USFXB (unexpected) first line was '{}' in {}".format( firstLines, thisFilename ) )
                continue
            if "<usfx " not in firstLines[0]:
                continue
            lastFilenameFound = thisFilename
            numFound += 1
        if numFound:
            if Globals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound )
            if numFound == 1:
                self.sourceFilename = lastFilenameFound
                self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename )
        elif looksHopeful and Globals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )
    # end of USFXXMLBible.__init_


    def load( self ):
        """
        Load the XML data file -- we should already know the filepath.
        """
        if Globals.verbosityLevel > 1:
            print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

                                #if Globals.verbosityLevel > 2: print( _("  It seems we have {}...").format( BBB ) )
                        #self.thisBook = BibleBook( self.name, BBB )
                        #self.thisBook.objectNameString = "OSIS XML Bible Book object"
                        #self.thisBook.objectTypeString = "OSIS"
                        #self.haveBook = True

        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError:
            errorString = sys.exc_info()[1]
            logging.critical( "USFXXMLBible.load: failed loading the xml file {}: '{}'.".format( self.sourceFilepath, errorString ) )
            return
        if Globals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (osis) container
        if self.tree.tag == 'usfx':
            location = "USFX file"
            Globals.checkXMLNoText( self.tree, location, '4f6h' )
            Globals.checkXMLNoTail( self.tree, location, '1wk8' )
            # Process the attributes first
            self.schemaLocation = None
            for attrib,value in self.tree.items():
                #print( "attrib", repr(attrib), repr(value) )
                if attrib.endswith("SchemaLocation"):
                    self.schemaLocation = value
                else:
                    logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) )
            BBB = C = V = None
            for element in self.tree:
                #print( "element", repr(element.tag) )
                sublocation = element.tag + " " + location
                if element.tag == 'languageCode':
                    self.languageCode = element.text
                    Globals.checkXMLNoTail( element, sublocation, 'cff3' )
                    Globals.checkXMLNoAttributes( element, sublocation, 'des1' )
                    Globals.checkXMLNoSubelements( element, sublocation, 'dwf2' )
                elif element.tag == 'book':
                    self.loadBook( element )
                    ##Globals.checkXMLNoSubelements( element, sublocation, '54f2' )
                    #Globals.checkXMLNoTail( element, sublocation, 'hd35' )
                    ## Process the attributes
                    #idField = bookStyle = None
                    #for attrib,value in element.items():
                        #if attrib=='id' or attrib=='code':
                            #idField = value # Should be USFM bookcode (not like bookReferenceCode which is BibleOrgSys BBB bookcode)
                            ##if idField != bookReferenceCode:
                            ##    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        #elif attrib=='style':
                            #bookStyle = value
                        #else:
                            #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                else:
                    logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) )
                    #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here
            if Globals.verbosityLevel > 2:
                print( "USFXXMLBible.load: Didn't find any regularly named USFX files in '{}'".format( self.sourceFolder ) )
            for thisFilename in foundFiles:
                # Look for BBB in the ID line (which should be the first line in a USFX file)
                isUSFX = False
                thisPath = os.path.join( self.sourceFolder, thisFilename )
                with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done
                    for line in possibleUSXFile:
                        if line.startswith( '\\id ' ):
                            USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id
                            if Globals.verbosityLevel > 2: print( "Have possible USFX ID '{}'".format( USXId ) )
                            BBB = Globals.BibleBooksCodes.getBBBFromUSFM( USXId )
                            if Globals.verbosityLevel > 2: print( "BBB is '{}'".format( BBB ) )
                            isUSFX = True
                        break # We only look at the first line
                if isUSFX:
                    UBB = USFXXMLBibleBook( self.name, BBB )
                    UBB.load( self.sourceFolder, thisFilename, self.encoding )
                    UBB.validateMarkers()
                    print( UBB )
                    self.books[BBB] = UBB
                    # Make up our book name dictionaries while we're at it
                    assumedBookNames = UBB.getAssumedBookNames()
                    for assumedBookName in assumedBookNames:
                        self.BBBToNameDict[BBB] = assumedBookName
                        assumedBookNameLower = assumedBookName.lower()
                        self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces)
            if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) )
        self.doPostLoadProcessing()
    # end of USFXXMLBible.load


    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if Globals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if Globals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self.name, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'vsg3' )
                Globals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'jsa0' )
                Globals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                Globals.checkXMLNoTail( element, location, 'dj35' )
                Globals.checkXMLNoAttributes( element, location, 'hs35' )
                Globals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.appendLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                Globals.checkXMLNoTail( element, location, 'ss13' )
                Globals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.appendLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, C )
            elif element.tag == 'b':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoAttributes( element, location, 'nd04' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.appendLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'od01' )
                Globals.checkXMLNoAttributes( element, location, 'us91' )
                Globals.checkXMLNoSubelements( element, location, 'gd92' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            else:
                logging.warning( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if Globals.debugFlag: halt
        self.saveBook( self.thisBook )
    # end of USFXXMLBible.loadBook


    def loadParagraph( self, paragraphElement, paragraphLocation, C ):
        """
        Load the paragraph (p or q) container from the XML data file.
        """
        #if Globals.verbosityLevel > 3:
            #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        V = None
        pText = paragraphElement.text
        Globals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' )

        # Process the attributes first
        sfm = level = style = None
        for attrib,value in paragraphElement.items():
            if attrib == 'sfm':
                sfm = value
            elif attrib == 'level':
                level = value
            elif attrib == 'style':
                style = value
            else:
                logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) )

        for element in paragraphElement:
            location = element.tag + " of " + paragraphLocation
            #print( "element", repr(element.tag) )
            if element.tag == 'v': # verse milestone
                vTail = clean( element.tail ) # Main verse text
                Globals.checkXMLNoText( element, location, 'crc2' )
                Globals.checkXMLNoSubelements( element, location, 'lct3' )
                lastV, V = V, None
                for attrib,value in element.items():
                    if attrib == 'id':
                        V = value
                    else:
                        logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                assert( V is not None )
                assert( V )
                self.thisBook.appendLine( 'v', V + ((' '+vTail) if vTail else '' ) )
            elif element.tag == 've': # verse end milestone -- we can just ignore this
                Globals.checkXMLNoText( element, location, 'lsc3' )
                Globals.checkXMLNoTail( element, location, 'mfy4' )
                Globals.checkXMLNoAttributes( element, location, 'bd24' )
                Globals.checkXMLNoSubelements( element, location, 'ks35' )
            elif element.tag == 'fig':
                self.loadFigure( element, location )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) )
                self.loadFootnote( element, location )
            elif element.tag == 'x':
                #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) )
                self.loadCrossreference( element, location )
            elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                self.loadCharacterFormatting( element, location )
            elif element.tag == 'cs': # character style -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'kf92' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) )
            elif element.tag in ('cp',): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'kdf0' )
                Globals.checkXMLNoAttributes( element, location, 'lkj1' )
                Globals.checkXMLNoSubelements( element, location, 'da13' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'ref': # encoded reference -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'bd83' )
                target = None
                for attrib,value in element.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
                #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
            elif element.tag == 'optionalLineBreak':
                print( "What is loadParagraph optionalLineBreak?" )
                if Globals.debugFlag: halt
            elif element.tag == 'milestone':
                print( "What is loadParagraph milestone?" )
                if Globals.debugFlag: halt
            else:
                logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.bookReferenceCode, C, V, location ) )
        return V
    # end of USFXXMLBible.loadParagraph


    def loadCharacterFormatting( self, element, location ):
        """
        """
        marker, text, tail = element.tag, clean(element.text), clean(element.tail)
        Globals.checkXMLNoAttributes( element, location, 'sd12' )
        self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            #print( "element", repr(element.tag) )
            if subelement.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
                self.loadFootnote( subelement, sublocation )
            else:
                logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.bookReferenceCode, C, V, location ) )
                halt
        self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadCharacterFormatting


    def loadFigure( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'ff36' )
        Globals.checkXMLNoAttributes( element, location, 'cf35' )
        figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            figTag, figText = subelement.tag, clean(subelement.text)
            assert( figTag in figDict )
            figDict[figTag] = '' if figText is None else figText
            Globals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
            Globals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
            Globals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
        newString = ''
        for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
            newString += ('' if j==0 else '|') + figDict[tag]
        figTail = clean( element.tail )
        self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
    # end of USFXXMLBible.loadFigure


    def loadTable( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'kg92' )
        Globals.checkXMLNoTail( element, location, 'ka92' )
        Globals.checkXMLNoAttributes( element, location, 'ks63' )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            if subelement.tag == 'tr':
                #print( "table", sublocation )
                self.thisBook.appendLine( 'tr', '' )
                Globals.checkXMLNoText( subelement, sublocation, 'sg32' )
                Globals.checkXMLNoTail( subelement, sublocation, 'dh82' )
                Globals.checkXMLNoAttributes( subelement, sublocation, 'mniq' )
                for sub2element in subelement:
                    sub2location = sub2element.tag + " of " + sublocation
                    tag, text = sub2element.tag, clean(sub2element.text)
                    assert( tag in ('th', 'thr', 'tc', 'tcr',) )
                    Globals.checkXMLNoTail( sub2element, sub2location, 'ah82' )
                    Globals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' )
                    level = None
                    for attrib,value in sub2element.items():
                        if attrib == 'level': level = value
                        else:
                            logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    marker = tag + (level if level else '')
                    self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
            else:
                logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.bookReferenceCode, C, V, sublocation ) )
    # end of USFXXMLBible.loadTable


    def loadFootnote( self, element, location ):
        """
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',):
                #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) )
            if Globals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',) )
            if marker=='ref':
                assert( fText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 'ls13' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'dq54' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) )
                if marker[0] == 'f': # Starts with f, e.g., fr, ft
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'js72' )
                        if marker2=='ref':
                            print( sub2location )
                            assert( not fText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if fTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) )
        self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadFootnote


    def loadCrossreference( self, element, location ):
        """
        Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
                #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
            if Globals.debugFlag: assert( marker in ('ref','xo','xt',) )
            if marker=='ref':
                assert( xText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
                if marker[0] == 'x': # Starts with x, e.g., xo, xt
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                        if marker2=='ref':
                            assert( not xText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if xTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
        self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
예제 #11
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      UnboundBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'UB file line is "' + line + '"' )
                if line[0]=='#':
                    hashBits = line[1:].split( '\t' )
                    if len(hashBits)==2 and hashBits[1]: # We have some valid meta-data
                        if hashBits[0] == 'name': self.name = hashBits[1]
                        elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        elif hashBits[0] == 'language': self.language = hashBits[1]
                        elif hashBits[0] == 'note': self.note = hashBits[1]
                        elif hashBits[0] == 'columns': self.columns = hashBits[1]
# Should some of these be placed into self.settingsDict???
                        logging.warning( "Unknown UnboundBible meta-data field '{}' = '{}'".format( hashBits[0], hashBits[1] ) )
                    continue # Just discard comment lines

                bits = line.split( '\t' )
                #print( self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith( 'lxx_a_parsing_' ):
                    logging.warning( _("Skipping bad '{}' line in {} {} {} {}:{}").format( line, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt

                if NRSVA_bookCode: assert( len(NRSVA_bookCode) == 3 )
                if NRSVA_chapterNumberString: assert( NRSVA_chapterNumberString.isdigit() )
                if NRSVA_verseNumberString: assert( NRSVA_verseNumberString.isdigit() )

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if Globals.debugFlag: assert( len(bookCode) == 3 )
                if Globals.debugFlag: assert( chapterNumberString.isdigit() )
                if Globals.debugFlag: assert( verseNumberString.isdigit() )

                if subverseNumberString:
                    logging.warning( _("subverseNumberString '{}' in {} {} {}:{}").format( subverseNumberString, BBB, bookCode, chapterNumberString, verseNumberString ) )

                vText = vText.strip() # Remove leading and trailing spaces
                if not vText: continue # Just ignore blank verses I think
                if vText == '+': continue # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )
                if sequenceNumberString:
                    if Globals.debugFlag: assert( sequenceNumberString.isdigit() )
                    sequenceNumber = int( sequenceNumberString )
                    if Globals.debugFlag: assert( sequenceNumber > lastSequence or \
                        self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) ) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode: # We've started a new book
                    if lastBookCode != -1: # Better save the last book
                        self.saveBook( thisBook )
                    BBB = Globals.BibleBooksCodes.getBBBFromUnboundBibleCode( bookCode )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "Unbound Bible Book object"
                    thisBook.objectTypeString = "Unbound"
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
예제 #12
0
class USFXXMLBible( Bible ):
    """
    Class to load and manipulate USFX Bibles.

    """
    def __init__( self, sourceFolder, givenName=None, encoding='utf-8' ):
        """
        Create the internal USFX Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "USFX XML Bible object"
        self.objectTypeString = "USFX"

        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding # Remember our parameters

        # Now we can set our object variables
        self.name = self.givenName
        if not self.name: self.name = os.path.basename( self.sourceFolder )
        if not self.name: self.name = os.path.basename( self.sourceFolder[:-1] ) # Remove the final slash
        if not self.name: self.name = "USFX Bible"
        if self.name.endswith( '_usfx' ): self.name = self.name[:-5] # Remove end of name for Haiola projects

        # Do a preliminary check on the readability of our folder
        if not os.access( self.sourceFolder, os.R_OK ):
            logging.error( "USFXXMLBible: Folder '{}' is unreadable".format( self.sourceFolder ) )

        # Do a preliminary check on the contents of our folder
        self.sourceFilename = self.sourceFilepath = None
        foundFiles, foundFolders = [], []
        for something in os.listdir( self.sourceFolder ):
            somepath = os.path.join( self.sourceFolder, something )
            if os.path.isdir( somepath ): foundFolders.append( something )
            elif os.path.isfile( somepath ):
                somethingUpper = something.upper()
                somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if somethingUpper.endswith( ending): ignore=True; break
                if ignore: continue
                if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                    foundFiles.append( something )
            else: logging.error( "Not sure what '{}' is in {}!".format( somepath, self.sourceFolder ) )
        if foundFolders: logging.info( "USFXXMLBible: Surprised to see subfolders in '{}': {}".format( self.sourceFolder, foundFolders ) )
        if not foundFiles:
            if Globals.verbosityLevel > 0: print( "USFXXMLBible: Couldn't find any files in '{}'".format( self.sourceFolder ) )
            return # No use continuing

        #print( self.sourceFolder, foundFolders, len(foundFiles), foundFiles )
        numFound = 0
        for thisFilename in sorted( foundFiles ):
            firstLines = Globals.peekIntoFile( thisFilename, sourceFolder, numLines=3 )
            if not firstLines or len(firstLines)<2: continue
            if not firstLines[0].startswith( '<?xml version="1.0"' ) \
            and not firstLines[0].startswith( '\ufeff<?xml version="1.0"' ): # same but with BOM
                if Globals.verbosityLevel > 2: print( "USFXB (unexpected) first line was '{}' in {}".format( firstLines, thisFilename ) )
                continue
            if "<usfx " not in firstLines[0]:
                continue
            lastFilenameFound = thisFilename
            numFound += 1
        if numFound:
            if Globals.verbosityLevel > 2: print( "USFXXMLBible got", numFound, sourceFolder, lastFilenameFound )
            if numFound == 1:
                self.sourceFilename = lastFilenameFound
                self.sourceFilepath = os.path.join( self.sourceFolder, self.sourceFilename )
        elif looksHopeful and Globals.verbosityLevel > 2: print( "    Looked hopeful but no actual files found" )
    # end of USFXXMLBible.__init_


    def load( self ):
        """
        Load the XML data file -- we should already know the filepath.
        """
        if Globals.verbosityLevel > 1:
            print( _("USFXXMLBible: Loading {} from {}...").format( self.name, self.sourceFolder ) )

                                #if Globals.verbosityLevel > 2: print( _("  It seems we have {}...").format( BBB ) )
                        #self.thisBook = BibleBook( self, BBB )
                        #self.thisBook.objectNameString = "OSIS XML Bible Book object"
                        #self.thisBook.objectTypeString = "OSIS"
                        #self.haveBook = True

        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError:
            errorString = sys.exc_info()[1]
            logging.critical( "USFXXMLBible.load: failed loading the xml file {}: '{}'.".format( self.sourceFilepath, errorString ) )
            return
        if Globals.debugFlag: assert( len ( self.tree ) ) # Fail here if we didn't load anything at all

        # Find the main (osis) container
        if self.tree.tag == 'usfx':
            location = "USFX file"
            Globals.checkXMLNoText( self.tree, location, '4f6h' )
            Globals.checkXMLNoTail( self.tree, location, '1wk8' )
            # Process the attributes first
            self.schemaLocation = None
            for attrib,value in self.tree.items():
                #print( "attrib", repr(attrib), repr(value) )
                if attrib.endswith("SchemaLocation"):
                    self.schemaLocation = value
                else:
                    logging.warning( "fv6g Unprocessed {} attribute ({}) in {}".format( attrib, value, location ) )
            BBB = C = V = None
            for element in self.tree:
                #print( "element", repr(element.tag) )
                sublocation = element.tag + " " + location
                if element.tag == 'languageCode':
                    self.languageCode = element.text
                    Globals.checkXMLNoTail( element, sublocation, 'cff3' )
                    Globals.checkXMLNoAttributes( element, sublocation, 'des1' )
                    Globals.checkXMLNoSubelements( element, sublocation, 'dwf2' )
                elif element.tag == 'book':
                    self.loadBook( element )
                    ##Globals.checkXMLNoSubelements( element, sublocation, '54f2' )
                    #Globals.checkXMLNoTail( element, sublocation, 'hd35' )
                    ## Process the attributes
                    #idField = bookStyle = None
                    #for attrib,value in element.items():
                        #if attrib=='id' or attrib=='code':
                            #idField = value # Should be USFM bookcode (not like BBB which is BibleOrgSys BBB bookcode)
                            ##if idField != BBB:
                            ##    logging.warning( _("Unexpected book code ({}) in {}").format( idField, sublocation ) )
                        #elif attrib=='style':
                            #bookStyle = value
                        #else:
                            #logging.warning( _("gfw2 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                else:
                    logging.warning( _("dbw1 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, sublocation ) )
                    #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )

        if not self.books: # Didn't successfully load any regularly named books -- maybe the files have weird names??? -- try to be intelligent here
            if Globals.verbosityLevel > 2:
                print( "USFXXMLBible.load: Didn't find any regularly named USFX files in '{}'".format( self.sourceFolder ) )
            for thisFilename in foundFiles:
                # Look for BBB in the ID line (which should be the first line in a USFX file)
                isUSFX = False
                thisPath = os.path.join( self.sourceFolder, thisFilename )
                with open( thisPath ) as possibleUSXFile: # Automatically closes the file when done
                    for line in possibleUSXFile:
                        if line.startswith( '\\id ' ):
                            USXId = line[4:].strip()[:3] # Take the first three non-blank characters after the space after id
                            if Globals.verbosityLevel > 2: print( "Have possible USFX ID '{}'".format( USXId ) )
                            BBB = Globals.BibleBooksCodes.getBBBFromUSFM( USXId )
                            if Globals.verbosityLevel > 2: print( "BBB is '{}'".format( BBB ) )
                            isUSFX = True
                        break # We only look at the first line
                if isUSFX:
                    UBB = USFXXMLBibleBook( self, BBB )
                    UBB.load( self.sourceFolder, thisFilename, self.encoding )
                    UBB.validateMarkers()
                    print( UBB )
                    self.books[BBB] = UBB
                    # Make up our book name dictionaries while we're at it
                    assumedBookNames = UBB.getAssumedBookNames()
                    for assumedBookName in assumedBookNames:
                        self.BBBToNameDict[BBB] = assumedBookName
                        assumedBookNameLower = assumedBookName.lower()
                        self.bookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        self.combinedBookNameDict[assumedBookNameLower] = BBB # Store the deduced book name (just lower case)
                        if ' ' in assumedBookNameLower: self.combinedBookNameDict[assumedBookNameLower.replace(' ','')] = BBB # Store the deduced book name (lower case without spaces)
            if self.books: print( "USFXXMLBible.load: Found {} irregularly named USFX files".format( len(self.books) ) )
        self.doPostLoadProcessing()
    # end of USFXXMLBible.load


    def loadBook( self, bookElement ):
        """
        Load the book container from the XML data file.
        """
        if Globals.verbosityLevel > 3:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( self.name, self.sourceFolder ) )
        assert( bookElement.tag == 'book' )
        mainLocation = self.name + " USFX book"

        # Process the attributes first
        bookCode = None
        for attrib,value in bookElement.items():
            if attrib == 'id':
                bookCode = value
            else:
                logging.warning( "bce3 Unprocessed {} attribute ({}) in {}".format( attrib, value, mainLocation ) )
        BBB = Globals.BibleBooksCodes.getBBBFromUSFM( bookCode )
        mainLocation = "{} USFX {} book".format( self.name, BBB )
        if Globals.verbosityLevel > 2:
            print( _("USFXXMLBible.loadBook: Loading {} from {}...").format( BBB, self.name ) )
        Globals.checkXMLNoText( self.tree, mainLocation, '4f6h' )
        Globals.checkXMLNoTail( self.tree, mainLocation, '1wk8' )

        # Now create our actual book
        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = "USFX XML Bible Book object"
        self.thisBook.objectTypeString = "USFX"

        C = V = '0'
        for element in bookElement:
            #print( "element", repr(element.tag) )
            location = "{} of {} {}:{}".format( element.tag, mainLocation, C, V )
            if element.tag == 'id':
                idText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'vsg3' )
                Globals.checkXMLNoSubelements( element, location, 'ksq2' )
                for attrib,value in element.items():
                    if attrib == 'id':
                        assert( value == bookCode )
                    else:
                        logging.warning( _("vsg4 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'id', bookCode + ((' '+idText) if idText else '') )
            elif element.tag == 'ide':
                ideText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'jsa0' )
                Globals.checkXMLNoSubelements( element, location, 'ls01' )
                charset = None
                for attrib,value in element.items():
                    if attrib == 'charset': charset = value
                    else:
                        logging.warning( _("jx53 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'ide', charset + ((' '+ideText) if ideText else '') )
            elif element.tag == 'h':
                hText = element.text
                Globals.checkXMLNoTail( element, location, 'dj35' )
                Globals.checkXMLNoAttributes( element, location, 'hs35' )
                Globals.checkXMLNoSubelements( element, location, 'hs32' )
                self.thisBook.appendLine( 'h', clean(hText) )
            elif element.tag == 'toc':
                tocText = element.text
                Globals.checkXMLNoTail( element, location, 'ss13' )
                Globals.checkXMLNoSubelements( element, location, 'js13' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems compulsory
                        level = value
                    else:
                        logging.warning( _("dg36 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'toc'+level, clean(tocText) )
            elif element.tag == 'c':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' ) # This is a milestone
                for attrib,value in element.items():
                    if attrib == 'id':
                        C, V = value, '0'
                    else:
                        logging.warning( _("hj52 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                self.thisBook.appendLine( 'c', C )
            elif element.tag == 's':
                sText = clean( element.text )
                Globals.checkXMLNoTail( element, location, 'wxg0' )
                level = None
                for attrib,value in element.items():
                    if attrib == 'level': # Seems optional
                        level = value
                    else:
                        logging.warning( _("bdy6 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                marker = 's'
                if level: marker += level
                self.thisBook.appendLine( marker, sText )
                for subelement in element:
                    #print( "subelement", repr(subelement.tag) )
                    sublocation = subelement.tag + " of " + location
                    if subelement.tag == 'f':
                        self.loadFootnote( subelement, sublocation )
                    elif subelement.tag == 'x':
                        self.loadCrossreference( subelement, sublocation )
                    elif subelement.tag == 'fig':
                        self.loadFigure( subelement, sublocation )
                    elif subelement.tag == 'table':
                        self.loadTable( subelement, sublocation )
                    elif subelement.tag in ('add','it','bd','bdit','sc',):
                        self.loadCharacterFormatting( subelement, sublocation )
                    elif subelement.tag == 'optionalLineBreak':
                        print( "What is loadBook optionalLineBreak?" )
                    else:
                        logging.warning( _("jx9q Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, BBB, C, V, sublocation ) )
            elif element.tag in ('p','q','d',):
                V = self.loadParagraph( element, location, C )
            elif element.tag == 'b':
                Globals.checkXMLNoText( element, location, 'ks35' )
                Globals.checkXMLNoTail( element, location, 'gs35' )
                Globals.checkXMLNoAttributes( element, location, 'nd04' )
                Globals.checkXMLNoSubelements( element, location, 'kdr3' )
                self.thisBook.appendLine( 'b', '' )
            elif element.tag in ('cl','cp'): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'od01' )
                Globals.checkXMLNoAttributes( element, location, 'us91' )
                Globals.checkXMLNoSubelements( element, location, 'gd92' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'table':
                self.loadTable( element, location )
            else:
                logging.critical( _("caf2 Unprocessed {} element after {} {}:{} in {}").format( element.tag, BBB, C, V, location ) )
                #self.addPriorityError( 1, c, v, _("Unprocessed {} element").format( element.tag ) )
                if Globals.debugFlag: halt
        self.saveBook( self.thisBook )
    # end of USFXXMLBible.loadBook


    def loadParagraph( self, paragraphElement, paragraphLocation, C ):
        """
        Load the paragraph (p or q) container from the XML data file.
        """
        #if Globals.verbosityLevel > 3:
            #print( _("USFXXMLBible.loadParagraph: Loading {} from {}...").format( self.name, self.sourceFolder ) )

        V = None
        pText = paragraphElement.text
        Globals.checkXMLNoTail( paragraphElement, paragraphLocation, 'vsg7' )

        # Process the attributes first
        sfm = level = style = None
        for attrib,value in paragraphElement.items():
            if attrib == 'sfm':
                sfm = value
            elif attrib == 'level':
                level = value
            elif attrib == 'style':
                style = value
            else:
                logging.warning( "vfh4 Unprocessed {} attribute ({}) in {}".format( attrib, value, paragraphLocation ) )

        for element in paragraphElement:
            location = element.tag + " of " + paragraphLocation
            #print( "element", repr(element.tag) )
            if element.tag == 'v': # verse milestone
                vTail = clean( element.tail ) # Main verse text
                Globals.checkXMLNoText( element, location, 'crc2' )
                Globals.checkXMLNoSubelements( element, location, 'lct3' )
                lastV, V = V, None
                for attrib,value in element.items():
                    if attrib == 'id':
                        V = value
                    else:
                        logging.warning( _("cbs2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                assert( V is not None )
                assert( V )
                self.thisBook.appendLine( 'v', V + ((' '+vTail) if vTail else '' ) )
            elif element.tag == 've': # verse end milestone -- we can just ignore this
                Globals.checkXMLNoText( element, location, 'lsc3' )
                Globals.checkXMLNoTail( element, location, 'mfy4' )
                Globals.checkXMLNoAttributes( element, location, 'bd24' )
                Globals.checkXMLNoSubelements( element, location, 'ks35' )
            elif element.tag == 'fig':
                self.loadFigure( element, location )
            elif element.tag == 'table':
                self.loadTable( element, location )
            elif element.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", paragraphLocation, C, V, repr(element.text) )
                self.loadFootnote( element, location )
            elif element.tag == 'x':
                #print( "USFX.loadParagraph Found xref at", paragraphLocation, C, V, repr(element.text) )
                self.loadCrossreference( element, location )
            elif element.tag in ('add','nd','wj','rq','sig','sls','bk','k','tl','vp','pn','qs','qt','em','it','bd','bdit','sc','no',): # character formatting
                self.loadCharacterFormatting( element, location )
            elif element.tag == 'cs': # character style -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'kf92' )
                sfm = None
                for attrib,value in element.items():
                    if attrib == 'sfm': sfm = value
                    else:
                        logging.warning( _("sh29 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                if sfm not in ('w','ior',): print( "cs sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( sfm, text, sfm, (' '+tail) if tail else '' ) )
            elif element.tag in ('cp',): # Simple single-line paragraph-level markers
                marker, text = element.tag, clean(element.text)
                Globals.checkXMLNoTail( element, location, 'kdf0' )
                Globals.checkXMLNoAttributes( element, location, 'lkj1' )
                Globals.checkXMLNoSubelements( element, location, 'da13' )
                self.thisBook.appendLine( marker, text )
            elif element.tag == 'ref': # encoded reference -- seems like a USFX hack
                text, tail = clean(element.text), clean(element.tail)
                Globals.checkXMLNoSubelements( element, location, 'bd83' )
                target = None
                for attrib,value in element.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("be83 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                #if target not in ('w','ior',): print( "ref sfm got", repr(sfm) )
                self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
                #print( "Saved", '\\{} {}\\{}*{}{}'.format( element.tag, target, element.tag, text, (' '+tail) if tail else '' ) )
            elif element.tag == 'optionalLineBreak':
                print( "What is loadParagraph optionalLineBreak?" )
                if Globals.debugFlag: halt
            elif element.tag == 'milestone':
                print( "What is loadParagraph milestone?" )
                if Globals.debugFlag: halt
            else:
                logging.warning( _("df45 Unprocessed {} element after {} {}:{} in {}").format( repr(element.tag), self.thisBook.BBB, C, V, location ) )
        return V
    # end of USFXXMLBible.loadParagraph


    def loadCharacterFormatting( self, element, location ):
        """
        """
        marker, text, tail = element.tag, clean(element.text), clean(element.tail)
        Globals.checkXMLNoAttributes( element, location, 'sd12' )
        self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            #print( "element", repr(element.tag) )
            if subelement.tag == 'f':
                #print( "USFX.loadParagraph Found footnote at", sublocation, C, V, repr(subelement.text) )
                self.loadFootnote( subelement, sublocation )
            else:
                logging.warning( _("sf31 Unprocessed {} element after {} {}:{} in {}").format( repr(subelement.tag), self.thisBook.BBB, C, V, location ) )
                halt
        self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadCharacterFormatting


    def loadFigure( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'ff36' )
        Globals.checkXMLNoAttributes( element, location, 'cf35' )
        figDict = { 'description':'', 'catalog':'', 'size':'', 'location':'', 'copyright':'', 'caption':'', 'reference':'' }
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            figTag, figText = subelement.tag, clean(subelement.text)
            assert( figTag in figDict )
            figDict[figTag] = '' if figText is None else figText
            Globals.checkXMLNoTail( subelement, sublocation, 'jkf5' )
            Globals.checkXMLNoAttributes( subelement, sublocation, 'ld18' )
            Globals.checkXMLNoSubelements( subelement, sublocation, 'hb46' )
        newString = ''
        for j,tag in enumerate( ('description', 'catalog', 'size', 'location', 'copyright', 'caption', 'reference',) ):
            newString += ('' if j==0 else '|') + figDict[tag]
        figTail = clean( element.tail )
        self.thisBook.appendToLastLine( ' \\fig {}\\fig*{}'.format( newString, (' '+figTail) if figTail else '' ) )
    # end of USFXXMLBible.loadFigure


    def loadTable( self, element, location ):
        """
        """
        Globals.checkXMLNoText( element, location, 'kg92' )
        Globals.checkXMLNoTail( element, location, 'ka92' )
        Globals.checkXMLNoAttributes( element, location, 'ks63' )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            if subelement.tag == 'tr':
                #print( "table", sublocation )
                self.thisBook.appendLine( 'tr', '' )
                Globals.checkXMLNoText( subelement, sublocation, 'sg32' )
                Globals.checkXMLNoTail( subelement, sublocation, 'dh82' )
                Globals.checkXMLNoAttributes( subelement, sublocation, 'mniq' )
                for sub2element in subelement:
                    sub2location = sub2element.tag + " of " + sublocation
                    tag, text = sub2element.tag, clean(sub2element.text)
                    assert( tag in ('th', 'thr', 'tc', 'tcr',) )
                    Globals.checkXMLNoTail( sub2element, sub2location, 'ah82' )
                    Globals.checkXMLNoSubelements( sub2element, sub2location, 'ka63' )
                    level = None
                    for attrib,value in sub2element.items():
                        if attrib == 'level': level = value
                        else:
                            logging.warning( _("vx25 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
                    marker = tag + (level if level else '')
                    self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, text ) )
            else:
                logging.warning( _("kv64 Unprocessed {} element after {} {}:{} in {}").format( subelement.tag, self.thisBook.BBB, C, V, sublocation ) )
    # end of USFXXMLBible.loadTable


    def loadFootnote( self, element, location ):
        """
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("dg35 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\f {}{}'.format( caller, (' '+text) if text else '' ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, fText, fTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadFootnote", repr(caller), repr(text), repr(tail), repr(marker), repr(fText), repr(fTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',):
                #print( "USFX.loadFootnote found", repr(caller), repr(marker), repr(fText), repr(fTail) )
            if Globals.debugFlag: assert( marker in ('ref','fr','ft','fq','fv','fk','fqa','it','bd','rq',) )
            if marker=='ref':
                assert( fText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 'ls13' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("gs35 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, fText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'dq54' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, fText ) )
                if marker[0] == 'f': # Starts with f, e.g., fr, ft
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, fText2, fTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'js72' )
                        if marker2=='ref':
                            print( sub2location )
                            assert( not fText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("hd52 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if fTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, fTail ) )
        self.thisBook.appendToLastLine( '\\f*{}'.format( (' '+tail) if tail else '' ) )
    # end of USFXXMLBible.loadFootnote


    def loadCrossreference( self, element, location ):
        """
        Has to handle: <x caller="+"><ref tgt="EXO.30.12">Exodus 30:12</ref></x>
        """
        text, tail = clean(element.text), clean(element.tail)
        caller = None
        for attrib,value in element.items():
            if attrib == 'caller':
                caller = value
            else:
                logging.warning( _("fhj2 Unprocessed {} attribute ({}) in {}").format( attrib, value, location ) )
        self.thisBook.appendToLastLine( ' \\x {}'.format( caller ) )
        for subelement in element:
            sublocation = subelement.tag + " of " + location
            marker, xText, xTail = subelement.tag, clean(subelement.text), clean(subelement.tail)
            #print( "USFX.loadCrossreference", repr(caller), repr(text), repr(tail), repr(marker), repr(xText), repr(xTail) )
            #if Globals.verbosityLevel > 0 and marker not in ('ref','xo','xt',):
                #print( "USFX.loadCrossreference found", repr(caller), repr(marker), repr(xText), repr(xTail) )
            if Globals.debugFlag: assert( marker in ('ref','xo','xt',) )
            if marker=='ref':
                assert( xText )
                Globals.checkXMLNoSubelements( subelement, sublocation, 's1sd' )
                target = None
                for attrib,value in subelement.items():
                    if attrib == 'tgt': target = value
                    else:
                        logging.warning( _("aj41 Unprocessed {} attribute ({}) in {}").format( attrib, value, sublocation ) )
                if target:
                    self.thisBook.appendToLastLine( ' \\{} {}\\{}*{}'.format( marker, target, marker, xText ) )
                else: halt
            else:
                Globals.checkXMLNoAttributes( subelement, sublocation, 'sc35' )
                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker, xText ) )
                if marker[0] == 'x': # Starts with x, e.g., xo, xt
                    for sub2element in subelement:
                        sub2location = sub2element.tag + " of " + sublocation
                        marker2, xText2, xTail2 = sub2element.tag, clean(sub2element.text), clean(sub2element.tail)
                        Globals.checkXMLNoSubelements( sub2element, sub2location, 'fs63' )
                        if marker2=='ref':
                            assert( not xText2 )
                            target = None
                            for attrib,value in sub2element.items():
                                if attrib == 'tgt': target = value
                                else:
                                    logging.warning( _("gs34 Unprocessed {} attribute ({}) in {}").format( attrib, value, sub2location ) )
                            if target:
                                self.thisBook.appendToLastLine( ' \\{} {}'.format( marker2, target ) )
                            else: halt
                        else: halt
                else: halt
            if xTail:
                self.thisBook.appendToLastLine( '\\{}*{}'.format( marker, xTail ) )
        self.thisBook.appendToLastLine( '\\x*{}'.format( (' '+tail) if tail else '' ) )
예제 #13
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      VPLBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'VLP file line is "' + line + '"' )
                if line[0]=='#': continue # Just discard comment lines

                bits = line.split( ' ', 2 )
                #print( self.givenName, BBB, bits )
                if len(bits) == 3 and ':' in bits[1]:
                    bookCode, CVString, vText = bits
                    chapterNumberString, verseNumberString = CVString.split( ':' )
                else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )

                if not bookCode and not chapterNumberString and not verseNumberString:
                    print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if Globals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                if Globals.debugFlag: assert( chapterNumberString.isdigit() )
                if Globals.debugFlag: assert( verseNumberString.isdigit() )
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookCode != lastBookCode: # We've started a new book
                    if lastBookCode != -1: # Better save the last book
                        self.saveBook( thisBook )
                    if bookCode == 'Ge': BBB = 'GEN'
                    elif bookCode == 'Le': BBB = 'LEV'
                    elif bookCode == 'Jud': BBB = 'JDG'
                    elif bookCode == 'Es': BBB = 'EST'
                    elif bookCode == 'Pr': BBB = 'PRO'
                    elif bookCode == 'So': BBB = 'SNG'
                    elif bookCode == 'La': BBB = 'LAM'
                    elif bookCode == 'Jude': BBB = 'JDE'
                    else: BBB = Globals.BibleBooksCodes.getBBB( bookCode )  # Try to guess
                    assert( BBB )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "VPL Bible Book object"
                    thisBook.objectTypeString = "VPL"
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle special formatting
                #   [brackets] are for Italicized words
                #   <brackets> are for the Words of Christ in Red
                #   «brackets»  are for the Titles in the Book  of Psalms.
                vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                if vText and vText[0]=='«':
                    assert( BBB=='PSA' and verseNumberString=='1' )
                    vBits = vText[1:].split( '»' )
                    #print( "vBits", vBits )
                    thisBook.appendLine( 'd', vBits[0] ) # Psalm title
                    vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
예제 #14
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      CSVBible.load: Detected UTF-16 Byte Order Marker" )
                #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if line == ' ':
                    continue  # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0] == '#': continue  # Just discard comment lines
                if lineCount == 1:
                    if line.startswith('"Book",'):
                        quoted = True
                        continue  # Just discard header line
                    elif line.startswith('Book,'):
                        quoted = False
                        continue  # Just discard header line

                bits = line.split(',', 3)
                #print( lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else:
                    print("Unexpected number of bits", self.givenName, BBB,
                          bString, chapterNumberString, verseNumberString,
                          vText, len(bits), bits)

                # Remove quote marks from these strings
                if quoted:
                    if len(bString) >= 2 and bString[0] == bString[
                            -1] and bString[0] in '"\'':
                        bString = bString[1:-1]
                    if len(chapterNumberString) >= 2 and chapterNumberString[
                            0] == chapterNumberString[
                                -1] and chapterNumberString[0] in '"\'':
                        chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString) >= 2 and verseNumberString[
                            0] == verseNumberString[-1] and verseNumberString[
                                0] in '"\'':
                        verseNumberString = verseNumberString[1:-1]
                    if len(vText) >= 2 and vText[0] == vText[-1] and vText[
                            0] in '"\'':
                        vText = vText[1:-1]
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #continue
                #if Globals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                #if Globals.debugFlag: assert( chapterNumberString.isdigit() )
                #if Globals.debugFlag: assert( verseNumberString.isdigit() )
                bookNumber = int(bString)
                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)

                if bookNumber != lastBookNumber:  # We've started a new book
                    if lastBookNumber != -1:  # Better save the last book
                        self.saveBook(thisBook)
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumber)  # Try to guess
                    assert (BBB)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "CSV Bible Book object"
                    thisBook.objectTypeString = "CSV"
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if Globals.debugFlag:
                        assert (chapterNumber > lastChapterNumber
                                or BBB == 'ESG'
                                )  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookNumber,
                                chapterNumberString, verseNumberString))
                    thisBook.appendLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace('\\ldblquote', '“').replace(
                    '\\rdblquote', '”').replace('\\lquote',
                                                '‘').replace('\\rquote', '’')
                vText = vText.replace('\\emdash', '—').replace('\\endash', '–')
                # Now do Unicode characters
                while True:  # Find patterns like \\'d3
                    match = re.search(r"\\'[0-9a-f][0-9a-f]", vText)
                    if not match: break
                    i = int(vText[match.start() + 2:match.end()],
                            16)  # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr(
                        i) + vText[match.end():]
                while True:  # Find patterns like \\u253?
                    match = re.search(r"\\u[1-2][0-9][0-9]\?", vText)
                    if not match: break
                    i = int(vText[match.start() + 2:match.end() -
                                  1])  # Convert three digits to decimal
                    vText = vText[:match.start()] + chr(
                        i) + vText[match.end():]
                #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                #assert( BBB=='PSA' and verseNumberString=='1' )
                #vBits = vText[1:].split( '»' )
                ##print( "vBits", vBits )
                #thisBook.appendLine( 'd', vBits[0] ) # Psalm title
                #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '&lt;') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.appendLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook(thisBook)
        self.doPostLoadProcessing()
예제 #15
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      CSVBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if line==' ': continue # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #print ( "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0]=='#': continue # Just discard comment lines
                if lineCount==1:
                    if line.startswith( '"Book",' ):
                        quoted = True
                        continue # Just discard header line
                    elif line.startswith( 'Book,' ):
                        quoted = False
                        continue # Just discard header line

                bits = line.split( ',', 3 )
                #print( lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else: print( "Unexpected number of bits", self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits )

                # Remove quote marks from these strings
                if quoted:
                    if len(bString)>=2 and bString[0]==bString[-1] and bString[0] in '"\'': bString = bString[1:-1]
                    if len(chapterNumberString)>=2 and chapterNumberString[0]==chapterNumberString[-1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString)>=2 and verseNumberString[0]==verseNumberString[-1] and verseNumberString[0] in '"\'': verseNumberString = verseNumberString[1:-1]
                    if len(vText)>=2 and vText[0]==vText[-1] and vText[0] in '"\'': vText = vText[1:-1]
                    #print( "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                    #print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if Globals.debugFlag: assert( 2  <= len(bookCode) <= 4 )
                #if Globals.debugFlag: assert( chapterNumberString.isdigit() )
                #if Globals.debugFlag: assert( verseNumberString.isdigit() )
                bookNumber = int( bString )
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookNumber != lastBookNumber: # We've started a new book
                    if lastBookNumber != -1: # Better save the last book
                        self.saveBook( thisBook )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )  # Try to guess
                    assert( BBB )
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "CSV Bible Book object"
                    thisBook.objectTypeString = "CSV"
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace( '\\ldblquote', '“' ).replace( '\\rdblquote', '”' ).replace( '\\lquote', '‘' ).replace( '\\rquote', '’' )
                vText = vText.replace( '\\emdash', '—' ).replace( '\\endash', '–' )
                # Now do Unicode characters
                while True: # Find patterns like \\'d3
                    match = re.search( r"\\'[0-9a-f][0-9a-f]", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()], 16 ) # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                while True: # Find patterns like \\u253?
                    match = re.search( r"\\u[1-2][0-9][0-9]\?", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()-1] ) # Convert three digits to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                #if vText != vTextOriginal: print( repr(vTextOriginal) ); print( repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                    #assert( BBB=='PSA' and verseNumberString=='1' )
                    #vBits = vText[1:].split( '»' )
                    ##print( "vBits", vBits )
                    #thisBook.appendLine( 'd', vBits[0] ) # Psalm title
                    #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
예제 #16
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self.name, BBB )
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except: # This reference is missing
                #print( "something wrong at", BBB, C, V )
                #if Globals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    if '\r' in line or '\n' in line:
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    while line and line[-1] in '\r\n': line = line[:-1]
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self.name, BBB )
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.appendLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.appendLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.appendLine( 'p', '' )
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()
예제 #17
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        def decodeVerse( encodedVerseString ):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith( '@@' ): # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith( '@@' ): # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace( '@^', '\\\\p ' )
            verseString = verseString.replace( '@0', '\\\\m ' )
            verseString = verseString.replace( '@1', '\\\\q1 ' ).replace( '@2', '\\\\q2 ' ).replace( '@3', '\\\\q3 ' ).replace( '@4', '\\q4 ' )
            verseString = verseString.replace( '@8', '\\\\m ' )
            # Character markers (marked now with single backslash)
            verseString = verseString.replace( '@6', '\\wj ' ).replace( '@5', '\\wj*' )
            verseString = verseString.replace( '@9', '\\add ' ).replace( '@7', '\\add*' ) # or \\i ???
            verseString = re.sub( r'@<f([0-9])@>@/', r'\\ff\1', verseString )
            verseString = re.sub( r'@<x([0-9])@>@/', r'\\xx\1', verseString )
            #print( repr( verseString ) )
            assert( '@' not in verseString )
            return verseString
        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = OrderedDict(), OrderedDict(), {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      YETBible.load: Detected UTF-16 Byte Order Marker" )
                    #line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                lastLine = line
                #print ( 'YETBible file line is "' + line + '"' )

                bits = line.split( '\t' )
                #print( self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert( len(bits) == 3 )
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert( 2 <= len(locale) <= 3 )
                        if locale == 'in': locale = 'id' # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, bookCode, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert( 3 <= len(bits) <= 4 )
                    thisBBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bits[1] )
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert( len(bits) == 5 )
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    #print( "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB: # We have a new book
                        if lastBBB is not None: # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert( BBB in bookNameDict )
                        bookLines = OrderedDict() # Keys are (C,V) strings
                    verseString = decodeVerse( encodedVerseString )
                    bookLines[(chapterNumberString,verseNumberString)] = verseString # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert( len(bits) == 5 )
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    headingString = encodedHeadingString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    #print( repr(encodedHeadingString), repr(headingString) )
                    assert( '@' not in headingString )
                    headingDict[(BBB,chapterNumberString,verseNumberString)] = headingString, [] # Blank refList
                    continue
                elif bits[0] == 'parallel': # These lines optionally follow pericope lines
                    assert( len(bits) == 2 )
                    heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)]
                    refList.append( bits[1] )
                    #print( "parallel2", repr(heading), refList )
                    headingDict[(BBB,chapterNumberString,verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert( len(bits) == 6 )
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                        assert( indexNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    noteString = re.sub( r'@<ta(.+?)@>', r'', noteString ) # Get rid of these encoded BCV references for now
                    noteString = re.sub( r'@<to(.+?)@>', r'', noteString ) # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace( '@/', '' )
                    #print( repr(encodedNoteString), repr(noteString) )
                    assert( '@' not in noteString )
                    xrefDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert( len(bits) == 6 )
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[1:]
                    if Globals.debugFlag:
                        assert( bookNumberString.isdigit() )
                        assert( chapterNumberString.isdigit() )
                        assert( verseNumberString.isdigit() )
                        assert( indexNumberString.isdigit() )
                    BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumberString )
                    noteString = encodedNoteString.replace( '@9', '\\it ' ).replace( '@7', '\\it*' )
                    assert( '@' not in noteString )
                    footnoteDict[(BBB,chapterNumberString,verseNumberString,indexNumberString)] = noteString
                    continue
                else: print( "YETBible: Unknown line type", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits ); halt
            bookDict[lastBBB] = bookLines # Save the last book


                #if bookCode != lastBookCode: # We've started a new book
                    #if lastBookCode != -1: # Better save the last book
                        #self.saveBook( thisBook )
                    #BBB = Globals.BibleBooksCodes.getBBBFromYETBibleCode( bookCode )
                    #thisBook = BibleBook( self.name, BBB )
                    #thisBook.objectNameString = "YET Bible Book object"
                    #thisBook.objectTypeString = "YET"
                    #lastBookCode = bookCode
                    #lastChapterNumber = lastVerseNumber = -1

                #if chapterNumber != lastChapterNumber: # We've started a new chapter
                    #if Globals.debugFlag: assert( chapterNumber > lastChapterNumber or BBB=='ESG' ) # Esther Greek might be an exception
                    #if chapterNumber == 0:
                        #logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #thisBook.appendLine( 'c', chapterNumberString )
                    #lastChapterNumber = chapterNumber
                    #lastVerseNumber = -1

                ## Handle the verse info
                #if verseNumber==lastVerseNumber and vText==lastVText:
                    #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    ## Move Psalm titles to verse zero
                    #verseNumber = 0
                #if verseNumber < lastVerseNumber:
                    #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #elif verseNumber == lastVerseNumber:
                    #if vText == lastVText:
                        #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #else:
                        #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                #thisBook.appendLine( 'v', verseNumberString + ' ' + vText )
                #lastVText = vText
                #lastVerseNumber = verseNumber

        # Now process the books
        for BBB,bkData in bookDict.items():
            #print( "Processing", BBB )
            thisBook = BibleBook( self.name, BBB )
            thisBook.objectNameString = "YET Bible Book object"
            thisBook.objectTypeString = "YET"
            lastChapterNumberString = None
            for (chapterNumberString,verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB,chapterNumberString,verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB,chapterNumberString,verseNumberString)]
                    #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.appendLine( 's', heading )
                    if refList:
                        refString = ""
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #print( 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.appendLine( 'r', '('+refString+')' )
                # Insert footnotes and cross-references
                while( '\\ff' in verseString ):
                    #print( "footnote", repr(verseString) )
                    fIx = verseString.index( '\\ff' )
                    caller = verseString[fIx+3]
                    #print( "fcaller", repr(caller) )
                    assert( caller.isdigit() )
                    note = footnoteDict[(BBB,chapterNumberString,verseNumberString,caller)]
                    #print( "fnote", repr(note) )
                    verseString = verseString[:fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[fIx+4:]
                    #print( "fvS", repr(verseString) )
                while( '\\xx' in verseString ):
                    #print( "xref", repr(verseString) )
                    fIx = verseString.index( '\\xx' )
                    caller = verseString[fIx+3]
                    #print( "xcaller", repr(caller) )
                    assert( caller.isdigit() )
                    note = xrefDict[(BBB,chapterNumberString,verseNumberString,caller)]
                    #print( "xnote", repr(note) )
                    verseString = verseString[:fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[fIx+4:]
                    #print( "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.appendLine( 'c', chapterNumberString )
                    lastChapterNumberString = chapterNumberString
                #print( BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith( '\\\\' ):  # It's an initial paragraph marker
                    if verseString[3]==' ': marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4]==' ': marker, verseString = verseString[2:4], verseString[5:]
                    else: halt
                    #print( '', '\\'+marker )
                    thisBook.appendLine( marker, '' )
                assert( not verseString.startswith( '\\\\' ) )
                bits = verseString.split( '\\\\' ) # Split on paragraph markers (but not character markers)
                for j,bit in enumerate(bits):
                    #print( "loop", j, repr(bit), repr(verseString) )
                    if j==0: thisBook.appendLine( 'v', verseNumberString + ' ' + verseString.rstrip() )
                    else:
                        if bit[1]==' ': marker, bit = bit[0], bit[2:]
                        elif bit[2]==' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #print( "mV", marker, repr(bit), repr(verseString) )
                        thisBook.appendLine( marker, bit.rstrip() )
            self.saveBook( thisBook )
        self.doPostLoadProcessing()
예제 #18
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        status = 0 # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    logging.info( "      DrupalBible.load: Detected UTF-16 Byte Order Marker" )
                    line = line[1:] # Remove the UTF-8 Byte Order Marker
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines

                #print ( 'DB file line is "' + line + '"' )
                if line[0] == '#': continue # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning( "Unknown DrupalBible first line: {}".format( repr(line) ) )

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else: # Get the version name details
                        bits = line.split( '|' )
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else: # Get the book name details
                        bits = line.split( '|' )
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert( bookShortName == bookCode )
                        BBBresult = Globals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode )
                        BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first)
                        bookDetails[BBB] = bookFullName, bookShortName, numChapters

                elif status == 2: # Get the verse text
                    bits = line.split( '|' )
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark: print( repr(lineMark) ); halt
                    BBBresult = Globals.BibleBooksCodes.getBBBFromDrupalBibleCode( bookCode )
                    BBB = BBBresult if isinstance( BBBresult, str ) else BBBresult[0] # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.saveBook( thisBook )
                        thisBook = BibleBook( self.name, BBB )
                        thisBook.objectNameString = "DrupalBible Bible Book object"
                        thisBook.objectTypeString = "DrupalBible"
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.appendLine( 'c', chapterNumberString )
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace( '<', '\\it ' ).replace( '>', '\\it*' )
                    thisBook.appendLine( 'v', verseNumberString + ' ' + verseText )

                else: halt

        # Save the final book
        self.saveBook( thisBook )
        self.doPostLoadProcessing()
예제 #19
0
class GreekNT( Bible ):
    """
    Class for handling a Greek NT object (which may contain one or more Bible books)

    Note: BBB is used in this class to represent the three-character referenceAbbreviation.
    """
    def __init__( self, sourceFilepath, givenName=None, encoding='utf-8' ):
        """
        Constructor: expects the filepath of the source folder.
        Loads (and crudely validates the file(s)) into ???.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = "Greek NT Bible object"
        self.objectTypeString = "GreekNT"

        # Now we can set our object variables
        self.sourceFilepath, self.givenName, self.encoding  = sourceFilepath, givenName, encoding

        self.title = self.version = self.date = None
        self.tree = self.header = self.frontMatter = self.divs = self.divTypesString = None
        #self.bkData, self.USFMBooks = OrderedDict(), OrderedDict()
        self.lang = self.language = None

        # Do a preliminary check on the readability of our files
        self.possibleFilenames = []
        if os.path.isdir( self.sourceFilepath ): # We've been given a folder -- see if we can find the files
            # There's no standard for OSIS xml file naming
            fileList = os.listdir( self.sourceFilepath )
            #print( len(fileList), fileList )
            # First try looking for OSIS book names
            for filename in fileList:
                if filename.lower().endswith('.txt'):
                    thisFilepath = os.path.join( self.sourceFilepath, filename )
                    #if Globals.debugFlag: print( "Trying {}...".format( thisFilepath ) )
                    if os.access( thisFilepath, os.R_OK ): # we can read that file
                        self.possibleFilenames.append( filename )
        elif not os.access( self.sourceFilepath, os.R_OK ):
            logging.critical( "GreekNT: File '{}' is unreadable".format( self.sourceFilepath ) )
            return # No use continuing
        #print( self.possibleFilenames ); halt

        self.name = self.givenName
        #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML
        #gNTfc.loadMorphGNT()
        #self.books = gNTfc.bookData
    # end of __init__


    #def x__str__( self ):
        #"""
        #This method returns the string representation of a Bible book code.

        #@return: the name of a Bible object formatted as a string
        #@rtype: string
        #"""
        #result = "Greek Bible converter object"
        ##if self.title: result += ('\n' if result else '') + self.title
        ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version )
        ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date )
        #if len(self.books)==1:
            #for BBB in self.books: break # Just get the first one
            #result += ('\n' if result else '') + "  " + _("Contains one book: {}").format( BBB )
        #else: result += ('\n' if result else '') + "  " + _("Number of books = {}").format( len(self.books) )
        #return result
    ## end of __str__


    def load( self ):
        if Globals.verbosityLevel > 2: print( "Loading Greek NT from {}...".format( self.sourceFilepath ) )
        for BBB in Greek.morphgntBooks:
            self.loadBook( BBB, Greek.morphgntFilenames[BBB] )
        if Globals.verbosityLevel > 3: print( "{} books loaded.".format( len(self.books) ) )
        #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book
            #for filename in self.possibleFilenames:
                #pathname = os.path.join( self.sourceFilepath, filename )
                #self.loadBook( pathname )
        #else: # most often we have all the Bible books in one file
            #self.loadFile( self.sourceFilepath )
        self.doPostLoadProcessing()
    # end of load


    def loadBook( self, BBB, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert( len(bits) == 7 )
            #print( bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #print( b, c, v )

            POSCode = bits[1]
            assert( len(POSCode) == 2 )
            assert( POSCode in Greek.POSCodes.keys() )

            parsingCode = bits[2]
            assert( len(parsingCode) == 8 )
            #print( parsingCode )
            for j,char in enumerate(parsingCode):
                assert( char in Greek.parsingCodes[j] )
            assert( parsingCode[0] in Greek.personCodes )
            assert( parsingCode[1] in Greek.tenseCodes )
            assert( parsingCode[2] in Greek.voiceCodes )
            assert( parsingCode[3] in Greek.modeCodes )
            assert( parsingCode[4] in Greek.caseCodes )
            assert( parsingCode[5] in Greek.numberCodes )
            assert( parsingCode[6] in Greek.genderCodes )
            assert( parsingCode[7] in Greek.degreeCodes )

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self.name, BBB )
        self.thisBook.objectNameString = "Morph Greek NT Bible Book object"
        self.thisBook.objectTypeString = "MorphGNT"
        filepath = os.path.join( self.sourceFilepath, filename )
        if Globals.verbosityLevel > 2: print( "  Loading {}...".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected UTF-16 Byte Order Marker in {}".format( filename ) )
                        line = line[1:] # Remove the UTF-8 Byte Order Marker
                    if line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #print ( 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #print( unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.appendLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.appendLine( 'v', vn )
                        lastV = vn
                    self.thisBook.appendLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.appendLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #print( reference,bits[1],bits[2] ); halt
            if 0: #except:
                logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                if lineCount > 1: print( 'Previous line was: ', lastLine )
                else: print( 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            if Globals.verbosityLevel > 3: print( "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.saveBook( self.thisBook )
            #self.books[BBB] = self.thisBook
    # end of loadBook


    def xanalyzeWords( self ):
        """ Go through the NT data and do some filing and sorting of the Greek words. """
        if Globals.verbosityLevel > 3: print( "analyzeWords: have {} books in the loaded NT".format( len(self.books) ) )
        self.wordCounts = {} # Wordcount organized by BBB
        self.wordCounts['Total'] = 0
        self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {}
        for BBB in self.books:
            wordCount = len(self.books[BBB])
            self.wordCounts[BBB] = wordCount
            self.wordCounts['Total'] += wordCount
            if Globals.verbosityLevel > 3: print( "  analyzeWords: {} has {} Greek words".format( BBB, wordCount ) )
            for reference,parsing,(punctuatedWord,actualWord,normalizedWord,lemma) in self.books[BBB]: # Stuff is: reference,parsing,words
                # File the actual words
                if actualWord not in self.actualWordsToNormalized:
                    self.actualWordsToNormalized[actualWord] = [([reference],normalizedWord,)]
                    #print( "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] )
                else: # we've already had this word before
                    previous = self.actualWordsToNormalized[actualWord]
                    #print( "had", actualWord, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        #print( "  Found a new", normalizedWord, "normalized word for", actualWord, "was", previous )
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.actualWordsToNormalized[actualWord] = newList
                        #print( "  now have", newList )
                # File the normalized words
                if normalizedWord not in self.normalizedWordsToActual:
                    self.normalizedWordsToActual[normalizedWord] = [([reference],actualWord,)]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToActual[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, actualWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldActualWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldActualWord )
                        if actualWord == oldActualWord:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldActualWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldActualWord,) )
                    if not found:
                        newList.append( ([reference],actualWord,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToActual[normalizedWord] = newList
                        #print( "  now have", newList )
                if normalizedWord not in self.normalizedWordsToParsing:
                    self.normalizedWordsToParsing[normalizedWord] = [([reference],parsing,)]
                    #print( "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToParsing[normalizedWord]
                    #print( "had", normalizedWord, "before with", previous, "now with", reference, parsing )
                    found = changed = False
                    newList = []
                    for oldRefList,oldParsing in previous:
                        #print( "  oRL", oldRefList, "oP", oldParsing )
                        if parsing == oldParsing:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldParsing,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldParsing,) )
                    if not found:
                        newList.append( ([reference],parsing,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToParsing[normalizedWord] = newList
                        #print( "  now have", newList )
                # File the self.lemmasToNormalizedWords
                if lemma not in self.lemmasToNormalizedWords:
                    self.lemmasToNormalizedWords[lemma] = [([reference],normalizedWord,)]
                    #print( "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] )
                else: # we've already had this word before
                    previous = self.lemmasToNormalizedWords[lemma]
                    #print( "had", lemma, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #print( "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert( not found )
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.lemmasToNormalizedWords[lemma] = newList
                        #print( "  now have", newList )
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek words".format( self.wordCounts['Total'] ) )
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} actual Greek words".format( len(self.actualWordsToNormalized) ) )
        if Globals.verbosityLevel > 3:
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                print( "  ", aW, self.actualWordsToNormalized[aW] )
                if j==6: break
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToActual) ) )
        if Globals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToActual.keys() ):
                print( "  ", nW, self.normalizedWordsToActual[nW] )
                if j==6: break
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToParsing) ) )
        if Globals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToParsing.keys() ):
                print( "  ", nW, self.normalizedWordsToParsing[nW] )
                if j==6: break
        if Globals.verbosityLevel > 2: print( "analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".format( len(self.lemmasToNormalizedWords) ) )
        if Globals.verbosityLevel > 3:
            for j,lem in enumerate( self.lemmasToNormalizedWords.keys() ):
                print( "  ", lem, self.lemmasToNormalizedWords[lem] )
                if j==6: break
        if 0:
            print( "The following actual words have multiple normalized forms:" )
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                if len(self.actualWordsToNormalized[aW])>1:
                    print( "  ", aW )
                    for entry in self.actualWordsToNormalized[aW]:
                        print( "    ", entry[1], self.normalizedWordsToParsing[entry[1]], entry[0] )
예제 #20
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BibleFilenameEndingsToAccept[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute('select * from Details')
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(
                self.settingsDict['Description']) < 40:
            self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict:
            self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename, self.settingsDict['encryption']))

        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Create the first book
        thisBook = BibleBook(self.name, BBB)
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = Globals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = cursor.fetchone()
                line = row[0]
            except:  # This reference is missing
                #print( "something wrong at", BBB, C, V )
                #if Globals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Found missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.settingsDict:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}"
                            .format(BBB, C, V, repr(line)))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}"
                            .format(BBB, C, V, repr(line), self.settingsDict))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    if '\r' in line or '\n' in line:
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if Globals.verbosityLevel > 3:
                            print("Saving", BBB, bookCount + 1)
                        self.saveBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self.name, BBB)
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList(BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = Globals.BibleBooksCodes.getReferenceNumber(BBB)
                    C = V = 1
                    #thisBook.appendLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.appendLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.appendLine('p', '')
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()