Esempio n. 1
0
 def getContextVerseData( self, module, key ):
     """
     Returns a InternalBibleEntryList of 5-tuples, e.g.,
         [
         ('c', 'c', '1', '1', []),
         ('c#', 'c', '1', '1', []),
         ('v', 'v', '1', '1', []),
         ('v~', 'v~', 'In the beginning God created the heavens and the earth.',
                                 'In the beginning God created the heavens and the earth.', [])
         ]
     """
     if SwordType == "CrosswireLibrary":
         try: verseText = module.stripText( key )
         except UnicodeDecodeError:
             print( "Can't decode utf-8 text of {} {}".format( module.getName(), key.getShortText() ) )
             return
         if BibleOrgSysGlobals.debugFlag:
             if '\n' in verseText or '\r' in verseText:
                 print( t("getVerseData: Why does it have CR or LF in {} {} {}") \
                         .format( module.getName(), key.getShortText(), repr(verseText) ) )
         verseText = verseText.rstrip()
         verseData = InternalBibleEntryList()
         c, v = key.getChapterNumberStr(), key.getVerseNumberStr()
         # Prepend the verse number since Sword modules don't contain that info in the data
         if v=='1': verseData.append( InternalBibleEntry( 'c#','c', c, c, None, c ) )
         verseData.append( InternalBibleEntry( 'v','v', v, v, None, v ) )
         verseData.append( InternalBibleEntry( 'v~','v~', verseText, verseText, None, verseText ) )
         contextVerseData = verseData, [] # No context
     elif SwordType == "OurCode":
         #print( t("module"), module )
         try: contextVerseData = module.getContextVerseData( key )
         except KeyError: # Just create a blank verse entry
             verseData = InternalBibleEntryList()
             c, v = key.getChapterNumberStr(), key.getVerseNumberStr()
             if v=='1': verseData.append( InternalBibleEntry( 'c#','c', c, c, None, c ) )
             verseData.append( InternalBibleEntry( 'v','v', v, v, None, v ) )
             contextVerseData = verseData, [] # No context
         #print( t("gVD={} key={}, st={}").format( module.getName(), key, contextVerseData ) )
         if contextVerseData is None:
             if key.getChapter()!=0 or key.getVerse()!=0: # We're not surprised if there's no chapter or verse zero
                 print( t("SwordInterface.getVerseData no VD"), module.getName(), key, contextVerseData )
             contextVerseData = [], None
         else:
             verseData, context = contextVerseData
             #print( "vD", verseData )
             #assert( isinstance( verseData, InternalBibleEntryList ) or isinstance( verseData, list ) )
             assert( isinstance( verseData, InternalBibleEntryList ) )
             #assert( isinstance( verseData, list ) )
             assert( 1 <= len(verseData) <= 6 )
     #print( verseData ); halt
     return contextVerseData
Esempio n. 2
0
    def load(self, folder):
        """
        Load the BCV Bible book from a folder.

        Tries to standardise by combining physical lines into logical lines,
            i.e., so that all lines begin with a BCV paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """
        def doaddLine(originalMarker, originalText):
            """
            Check for newLine markers within the line (if so, break the line) and save the information in our database.

            Also convert ~ to a proper non-break space.
            """
            #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) )
            marker, text = originalMarker, originalText.replace('~', ' ')
            if '\\' in text:  # Check markers inside the lines
                markerList = BibleOrgSysGlobals.BCVMarkers.getMarkerListFromText(
                    text)
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList:  # check paragraph markers
                    if insideMarker == '\\':  # it's a free-standing backspace
                        loadErrors.append(
                            _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}"
                              ).format(self.BBB, C, V, marker, text))
                        logging.error(
                            _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}"
                              ).format(self.BBB, C, V, marker, text)
                        )  # Only log the first error in the line
                        self.addPriorityError(
                            100, C, V,
                            _("Improper free-standing backspace character inside a line"
                              ))
                    elif BibleOrgSysGlobals.BCVMarkers.isNewlineMarker(
                            insideMarker
                    ):  # Need to split the line for everything else to work properly
                        if ix == 0:
                            loadErrors.append(
                                _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}"
                                  ).format(self.BBB, C, V, insideMarker,
                                           marker, text))
                            logging.error(
                                _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}"
                                  ).format(insideMarker, self.BBB, C, V,
                                           marker, text)
                            )  # Only log the first error in the line
                            self.addPriorityError(
                                96, C, V,
                                _("NewLine marker \\{} shouldn't be inside a line"
                                  ).format(insideMarker))
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine(marker, thisText)
                        ix = iMIndex + 1 + len(insideMarker) + len(
                            nextSignificantChar
                        )  # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = insideMarker  # setup for the next line
                if ix != 0:  # We must have separated multiple lines
                    text = text[ix:]  # Get the final bit of the line
            self.addLine(
                marker, text
            )  # Call the function in the base class to save the line (or the remainder of the line if we split it above)

        # end of doaddLine

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " + _("Loading {} from {}…").format(self.BBB, folder))
        self.sourceFolder = os.path.join(folder, self.BBB + '/')

        # Read book metadata
        self.loadBookMetadata(
            os.path.join(self.sourceFolder, self.BBB + '__BookMetadata.txt'))

        fixErrors = []
        self._processedLines = InternalBibleEntryList(
        )  # Contains more-processed tuples which contain the actual Bible text -- see below

        DUMMY_VALUE = 999999  # Some number bigger than the number of characters in a line
        for CV in self.givenCVList:
            lineCount = 0
            if isinstance(CV, tuple) and len(CV) == 2:
                C, V = CV
                filename = self.BBB + '_C' + C + 'V' + V + '.txt'
            else:
                assert CV == ('-1', )
                C = V = '-1', '0'
                filename = self.BBB + '__Intro.txt'
            with open(os.path.join(self.sourceFolder, filename),
                      'rt',
                      encoding='utf-8'
                      ) as myFile:  # Automatically closes the file when done
                for line in myFile:
                    lineCount += 1
                    if lineCount == 1 and line and line[0] == chr(
                            65279):  #U+FEFF
                        logging.info(
                            exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}"
                                ).format(metadataFilepath))
                        line = line[1:]  # Remove the Byte Order Marker (BOM)
                    if line and line[-1] == '\n':
                        line = line[:-1]  # Remove trailing newline character
                    #print( CV, "line", line )
                    assert line and line[0] == '\\'
                    ixEQ = line.find('=')
                    ixLL = line.find('<<')
                    if ixEQ == -1: ixEQ = DUMMY_VALUE
                    if ixLL == -1: ixLL = DUMMY_VALUE
                    ix = min(ixEQ, ixLL)
                    marker = line[1:ix]
                    #print( 'marker', repr(marker) )
                    if ixLL == DUMMY_VALUE:
                        originalMarker = None
                        if marker == 'v~': originalMarker = 'v'
                        elif marker == 'c#': originalMarker = 'c'
                    else: originalMarker = line[ixLL + 2:ixEQ]
                    #print( 'originalMarker', repr(originalMarker) )
                    if ixEQ == DUMMY_VALUE: text = None
                    else: text = line[ixEQ + 1:]
                    #print( 'text', repr(text) )

                    if marker[0] == '¬':
                        assert originalMarker is None and text is None
                        adjText = extras = None
                    else:
                        if originalMarker is None: originalMarker = marker
                        if text is None: text = ''
                        adjText, cleanText, extras = self.processLineFix(
                            C, V, originalMarker, text, fixErrors
                        )  # separate out the notes (footnotes and cross-references)
                    self._processedLines.append(
                        InternalBibleEntry(marker, originalMarker, adjText,
                                           cleanText, extras, text))

            #if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
            #if debugging: print( self._rawLines ); halt
        if fixErrors: self.errorDictionary['Fix Text Errors'] = fixErrors
        self._processedFlag = True
        self.makeCVIndex()
Esempio n. 3
0
class BCVBibleBook(BibleBook):
    """
    Class to load and manipulate a single BCV file / book.
    """
    def __init__(self, containerBibleObject, BBB):
        """
        Create the BCV Bible book object.
        """
        BibleBook.__init__(self, containerBibleObject,
                           BBB)  # Initialise the base class
        self.objectNameString = 'BCV Bible Book object'
        self.objectTypeString = 'BCV'

    # end of BCVBibleBook.__init__

    def loadBookMetadata(self, metadataFilepath):
        """
        Process the metadata from the given filepath.

        Sets some class variables and puts a dictionary into self.settingsDict.
        """
        if BibleOrgSysGlobals.debugFlag and BibleOrgSysGlobals.verbosityLevel > 2:
            print('  ' + exp("Loading {} metadata from {!r}…").format(
                self.BBB, metadataFilepath))
        #if encoding is None: encoding = 'utf-8'
        self.metadataFilepath = metadataFilepath
        self.givenCVList = None
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open(metadataFilepath
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1 and line and line[0] == chr(65279):  #U+FEFF
                    logging.info(
                        exp("loadBookMetadata: Detected Unicode Byte Order Marker (BOM) in {}"
                            ).format(metadataFilepath))
                    line = line[1:]  # Remove the Byte Order Marker (BOM)
                if line and line[-1] == '\n':
                    line = line[:-1]  # Remove trailing newline character
                line = line.strip()  # Remove leading and trailing whitespace
                if not line: continue  # Just discard blank lines
                lastLine = line
                processed = False
                #BCVVersion = 1.0
                #WorkName = Matigsalug
                #CVList = [('1', '1'), ('1', '2'), ('1', '3'), ('1', '4'), ('1', '5'), …
                for fieldName in (
                        'BCVVersion',
                        'WorkName',
                        'CVList',
                ):
                    if line.startswith(fieldName + ' = '):
                        settingsDict[fieldName] = line[len(fieldName) + 3:]
                        processed = True
                        break
                if not processed:
                    print(
                        exp("ERROR: Unexpected {!r} line in metadata file").
                        format(line))
        #print( 'SD', settingsDict )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " +
                  exp("Got {} metadata entries:").format(len(settingsDict)))
            if BibleOrgSysGlobals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    print("    {}: {}".format(key, settingsDict[key]))

        if 'BCVVersion' in settingsDict:
            settingsDict['BCVVersion'] == '1.0'
            del settingsDict['BCVVersion']
        if 'WorkName' in settingsDict:
            self.workName = settingsDict['WorkName']
            del settingsDict['WorkName']
        #if 'Name' in settingsDict: self.projectName = settingsDict['Name']; del settingsDict['Name']
        #if 'Abbreviation' in settingsDict: self.projectName = settingsDict['Abbreviation']; del settingsDict['Abbreviation']
        if 'CVList' in settingsDict:
            #self.givenCVList = None
            CVL = settingsDict['CVList']
            if CVL and CVL[0] == '[' and CVL[-1] == ']':
                self.givenCVList = eval(CVL)
            #print( 'x1', repr(self.givenCVList) )
            if isinstance(self.givenCVList, list): del settingsDict['CVList']
            else:
                print(
                    exp("ERROR: Unexpected {!r} format in metadata file").
                    format(CVL))

        if settingsDict:
            self.settingsDict = settingsDict
            print('book SD', self.settingsDict)

    # end of BCVBibleBook.loadBookMetadata

    def load(self, folder):
        """
        Load the BCV Bible book from a folder.

        Tries to standardise by combining physical lines into logical lines,
            i.e., so that all lines begin with a BCV paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """
        def doaddLine(originalMarker, originalText):
            """
            Check for newLine markers within the line (if so, break the line) and save the information in our database.

            Also convert ~ to a proper non-break space.
            """
            #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) )
            marker, text = originalMarker, originalText.replace('~', ' ')
            if '\\' in text:  # Check markers inside the lines
                markerList = BibleOrgSysGlobals.BCVMarkers.getMarkerListFromText(
                    text)
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList:  # check paragraph markers
                    if insideMarker == '\\':  # it's a free-standing backspace
                        loadErrors.append(
                            _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}"
                              ).format(self.BBB, C, V, marker, text))
                        logging.error(
                            _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}"
                              ).format(self.BBB, C, V, marker, text)
                        )  # Only log the first error in the line
                        self.addPriorityError(
                            100, C, V,
                            _("Improper free-standing backspace character inside a line"
                              ))
                    elif BibleOrgSysGlobals.BCVMarkers.isNewlineMarker(
                            insideMarker
                    ):  # Need to split the line for everything else to work properly
                        if ix == 0:
                            loadErrors.append(
                                _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}"
                                  ).format(self.BBB, C, V, insideMarker,
                                           marker, text))
                            logging.error(
                                _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}"
                                  ).format(insideMarker, self.BBB, C, V,
                                           marker, text)
                            )  # Only log the first error in the line
                            self.addPriorityError(
                                96, C, V,
                                _("NewLine marker \\{} shouldn't be inside a line"
                                  ).format(insideMarker))
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine(marker, thisText)
                        ix = iMIndex + 1 + len(insideMarker) + len(
                            nextSignificantChar
                        )  # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = insideMarker  # setup for the next line
                if ix != 0:  # We must have separated multiple lines
                    text = text[ix:]  # Get the final bit of the line
            self.addLine(
                marker, text
            )  # Call the function in the base class to save the line (or the remainder of the line if we split it above)

        # end of doaddLine

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " + _("Loading {} from {}…").format(self.BBB, folder))
        self.sourceFolder = os.path.join(folder, self.BBB + '/')

        # Read book metadata
        self.loadBookMetadata(
            os.path.join(self.sourceFolder, self.BBB + '__BookMetadata.txt'))

        fixErrors = []
        self._processedLines = InternalBibleEntryList(
        )  # Contains more-processed tuples which contain the actual Bible text -- see below

        DUMMY_VALUE = 999999  # Some number bigger than the number of characters in a line
        for CV in self.givenCVList:
            lineCount = 0
            if isinstance(CV, tuple) and len(CV) == 2:
                C, V = CV
                filename = self.BBB + '_C' + C + 'V' + V + '.txt'
            else:
                assert CV == ('-1', )
                C = V = '-1', '0'
                filename = self.BBB + '__Intro.txt'
            with open(os.path.join(self.sourceFolder, filename),
                      'rt',
                      encoding='utf-8'
                      ) as myFile:  # Automatically closes the file when done
                for line in myFile:
                    lineCount += 1
                    if lineCount == 1 and line and line[0] == chr(
                            65279):  #U+FEFF
                        logging.info(
                            exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}"
                                ).format(metadataFilepath))
                        line = line[1:]  # Remove the Byte Order Marker (BOM)
                    if line and line[-1] == '\n':
                        line = line[:-1]  # Remove trailing newline character
                    #print( CV, "line", line )
                    assert line and line[0] == '\\'
                    ixEQ = line.find('=')
                    ixLL = line.find('<<')
                    if ixEQ == -1: ixEQ = DUMMY_VALUE
                    if ixLL == -1: ixLL = DUMMY_VALUE
                    ix = min(ixEQ, ixLL)
                    marker = line[1:ix]
                    #print( 'marker', repr(marker) )
                    if ixLL == DUMMY_VALUE:
                        originalMarker = None
                        if marker == 'v~': originalMarker = 'v'
                        elif marker == 'c#': originalMarker = 'c'
                    else: originalMarker = line[ixLL + 2:ixEQ]
                    #print( 'originalMarker', repr(originalMarker) )
                    if ixEQ == DUMMY_VALUE: text = None
                    else: text = line[ixEQ + 1:]
                    #print( 'text', repr(text) )

                    if marker[0] == '¬':
                        assert originalMarker is None and text is None
                        adjText = extras = None
                    else:
                        if originalMarker is None: originalMarker = marker
                        if text is None: text = ''
                        adjText, cleanText, extras = self.processLineFix(
                            C, V, originalMarker, text, fixErrors
                        )  # separate out the notes (footnotes and cross-references)
                    self._processedLines.append(
                        InternalBibleEntry(marker, originalMarker, adjText,
                                           cleanText, extras, text))

            #if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
            #if debugging: print( self._rawLines ); halt
        if fixErrors: self.errorDictionary['Fix Text Errors'] = fixErrors
        self._processedFlag = True
        self.makeCVIndex()
Esempio n. 4
0
 def getContextVerseData(self, module, key):
     """
     Returns a InternalBibleEntryList of 5-tuples, e.g.,
         [
         ('c', 'c', '1', '1', []),
         ('c#', 'c', '1', '1', []),
         ('v', 'v', '1', '1', []),
         ('v~', 'v~', 'In the beginning God created the heavens and the earth.',
                                 'In the beginning God created the heavens and the earth.', [])
         ]
     """
     if SwordType == "CrosswireLibrary":
         try:
             verseText = module.stripText(key)
         except UnicodeDecodeError:
             print("Can't decode utf-8 text of {} {}".format(
                 module.getName(), key.getShortText()))
             return
         if BibleOrgSysGlobals.debugFlag:
             if '\n' in verseText or '\r' in verseText:
                 print( t("getVerseData: Why does it have CR or LF in {} {} {}") \
                         .format( module.getName(), key.getShortText(), repr(verseText) ) )
         verseText = verseText.rstrip()
         verseData = InternalBibleEntryList()
         c, v = key.getChapterNumberStr(), key.getVerseNumberStr()
         # Prepend the verse number since Sword modules don't contain that info in the data
         if v == '1':
             verseData.append(InternalBibleEntry('c#', 'c', c, c, None, c))
         verseData.append(InternalBibleEntry('v', 'v', v, v, None, v))
         verseData.append(
             InternalBibleEntry('v~', 'v~', verseText, verseText, None,
                                verseText))
         contextVerseData = verseData, []  # No context
     elif SwordType == "OurCode":
         #print( t("module"), module )
         try:
             contextVerseData = module.getContextVerseData(key)
         except KeyError:  # Just create a blank verse entry
             verseData = InternalBibleEntryList()
             c, v = key.getChapterNumberStr(), key.getVerseNumberStr()
             if v == '1':
                 verseData.append(
                     InternalBibleEntry('c#', 'c', c, c, None, c))
             verseData.append(InternalBibleEntry('v', 'v', v, v, None, v))
             contextVerseData = verseData, []  # No context
         #print( t("gVD={} key={}, st={}").format( module.getName(), key, contextVerseData ) )
         if contextVerseData is None:
             if key.getChapter() != 0 or key.getVerse(
             ) != 0:  # We're not surprised if there's no chapter or verse zero
                 print(t("SwordInterface.getVerseData no VD"),
                       module.getName(), key, contextVerseData)
             contextVerseData = [], None
         else:
             verseData, context = contextVerseData
             #print( "vD", verseData )
             #assert( isinstance( verseData, InternalBibleEntryList ) or isinstance( verseData, list ) )
             assert (isinstance(verseData, InternalBibleEntryList))
             #assert( isinstance( verseData, list ) )
             assert (1 <= len(verseData) <= 6)
     #print( verseData ); halt
     return contextVerseData
Esempio n. 5
0
    def load( self, folder ):
        """
        Load the BCV Bible book from a folder.

        Tries to standardise by combining physical lines into logical lines,
            i.e., so that all lines begin with a BCV paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """

        def doaddLine( originalMarker, originalText ):
            """
            Check for newLine markers within the line (if so, break the line) and save the information in our database.

            Also convert ~ to a proper non-break space.
            """
            #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) )
            marker, text = originalMarker, originalText.replace( '~', ' ' )
            if '\\' in text: # Check markers inside the lines
                markerList = BibleOrgSysGlobals.BCVMarkers.getMarkerListFromText( text )
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers
                    if insideMarker == '\\': # it's a free-standing backspace
                        loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) )
                        logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                        self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") )
                    elif BibleOrgSysGlobals.BCVMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly
                        if ix==0:
                            loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) )
                            logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                            self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) )
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine( marker, thisText )
                        ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = insideMarker # setup for the next line
                if ix != 0: # We must have separated multiple lines
                    text = text[ix:] # Get the final bit of the line
            self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above)
        # end of doaddLine


        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {} from {}…").format( self.BBB, folder ) )
        self.sourceFolder = os.path.join( folder, self.BBB+'/' )

        # Read book metadata
        self.loadBookMetadata( os.path.join( self.sourceFolder, self.BBB+'__BookMetadata.txt' ) )

        fixErrors = []
        self._processedLines = InternalBibleEntryList() # Contains more-processed tuples which contain the actual Bible text -- see below

        DUMMY_VALUE = 999999 # Some number bigger than the number of characters in a line
        for CV in self.givenCVList:
            lineCount = 0
            if isinstance( CV, tuple) and len(CV)==2:
                C, V = CV
                filename = self.BBB+'_C'+C+'V'+V+'.txt'
            else:
                assert CV == ('-1',)
                C = V = '-1', '0'
                filename = self.BBB+'__Intro.txt'
            with open( os.path.join( self.sourceFolder, filename ), 'rt', encoding='utf-8' ) as myFile: # Automatically closes the file when done
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
                        logging.info( exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) )
                        line = line[1:] # Remove the Byte Order Marker (BOM)
                    if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
                    #print( CV, "line", line )
                    assert line and line[0]=='\\'
                    ixEQ = line.find( '=' )
                    ixLL = line.find( '<<' )
                    if ixEQ == -1: ixEQ = DUMMY_VALUE
                    if ixLL == -1: ixLL = DUMMY_VALUE
                    ix = min( ixEQ, ixLL )
                    marker = line[1:ix]
                    #print( 'marker', repr(marker) )
                    if ixLL == DUMMY_VALUE:
                        originalMarker = None
                        if marker == 'v~': originalMarker = 'v'
                        elif marker == 'c#': originalMarker = 'c'
                    else: originalMarker = line[ixLL+2:ixEQ]
                    #print( 'originalMarker', repr(originalMarker) )
                    if ixEQ == DUMMY_VALUE: text = None
                    else: text = line[ixEQ+1:]
                    #print( 'text', repr(text) )

                    if marker[0] == '¬':
                        assert originalMarker is None and text is None
                        adjText = extras = None
                    else:
                        if originalMarker is None: originalMarker = marker
                        if text is None: text = ''
                        adjText, cleanText, extras = self.processLineFix( C, V, originalMarker, text, fixErrors ) # separate out the notes (footnotes and cross-references)
                    self._processedLines.append( InternalBibleEntry(marker, originalMarker, adjText, cleanText, extras, text) )

            #if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
            #if debugging: print( self._rawLines ); halt
        if fixErrors: self.errorDictionary['Fix Text Errors'] = fixErrors
        self._processedFlag = True
        self.makeCVIndex()
Esempio n. 6
0
class BCVBibleBook( BibleBook ):
    """
    Class to load and manipulate a single BCV file / book.
    """

    def __init__( self, containerBibleObject, BBB ):
        """
        Create the BCV Bible book object.
        """
        BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class
        self.objectNameString = 'BCV Bible Book object'
        self.objectTypeString = 'BCV'
    # end of BCVBibleBook.__init__


    def loadBookMetadata( self, metadataFilepath ):
        """
        Process the metadata from the given filepath.

        Sets some class variables and puts a dictionary into self.settingsDict.
        """
        if BibleOrgSysGlobals.debugFlag and BibleOrgSysGlobals.verbosityLevel > 2:
            print( '  ' + exp("Loading {} metadata from {!r}…").format( self.BBB, metadataFilepath ) )
        #if encoding is None: encoding = 'utf-8'
        self.metadataFilepath = metadataFilepath
        self.givenCVList = None
        lastLine, lineCount, status, settingsDict = '', 0, 0, {}
        with open( metadataFilepath ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
                    logging.info( exp("loadBookMetadata: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) )
                    line = line[1:] # Remove the Byte Order Marker (BOM)
                if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
                line = line.strip() # Remove leading and trailing whitespace
                if not line: continue # Just discard blank lines
                lastLine = line
                processed = False
#BCVVersion = 1.0
#WorkName = Matigsalug
#CVList = [('1', '1'), ('1', '2'), ('1', '3'), ('1', '4'), ('1', '5'), …
                for fieldName in ('BCVVersion','WorkName','CVList',):
                    if line.startswith( fieldName+' = ' ):
                        settingsDict[fieldName] = line[len(fieldName)+3:]
                        processed = True
                        break
                if not processed: print( exp("ERROR: Unexpected {!r} line in metadata file").format( line ) )
        #print( 'SD', settingsDict )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print( "  " + exp("Got {} metadata entries:").format( len(settingsDict) ) )
            if BibleOrgSysGlobals.verbosityLevel > 3:
                for key in sorted(settingsDict):
                    print( "    {}: {}".format( key, settingsDict[key] ) )

        if 'BCVVersion' in settingsDict: settingsDict['BCVVersion'] == '1.0'; del settingsDict['BCVVersion']
        if 'WorkName' in settingsDict: self.workName = settingsDict['WorkName']; del settingsDict['WorkName']
        #if 'Name' in settingsDict: self.projectName = settingsDict['Name']; del settingsDict['Name']
        #if 'Abbreviation' in settingsDict: self.projectName = settingsDict['Abbreviation']; del settingsDict['Abbreviation']
        if 'CVList' in settingsDict:
            #self.givenCVList = None
            CVL = settingsDict['CVList']
            if CVL and CVL[0]=='[' and CVL[-1]==']': self.givenCVList = eval( CVL )
            #print( 'x1', repr(self.givenCVList) )
            if isinstance( self.givenCVList, list ): del settingsDict['CVList']
            else: print( exp("ERROR: Unexpected {!r} format in metadata file").format( CVL ) )

        if settingsDict:
            self.settingsDict = settingsDict
            print( 'book SD', self.settingsDict )
    # end of BCVBibleBook.loadBookMetadata


    def load( self, folder ):
        """
        Load the BCV Bible book from a folder.

        Tries to standardise by combining physical lines into logical lines,
            i.e., so that all lines begin with a BCV paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """

        def doaddLine( originalMarker, originalText ):
            """
            Check for newLine markers within the line (if so, break the line) and save the information in our database.

            Also convert ~ to a proper non-break space.
            """
            #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) )
            marker, text = originalMarker, originalText.replace( '~', ' ' )
            if '\\' in text: # Check markers inside the lines
                markerList = BibleOrgSysGlobals.BCVMarkers.getMarkerListFromText( text )
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers
                    if insideMarker == '\\': # it's a free-standing backspace
                        loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) )
                        logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                        self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") )
                    elif BibleOrgSysGlobals.BCVMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly
                        if ix==0:
                            loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) )
                            logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                            self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) )
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine( marker, thisText )
                        ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = insideMarker # setup for the next line
                if ix != 0: # We must have separated multiple lines
                    text = text[ix:] # Get the final bit of the line
            self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above)
        # end of doaddLine


        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {} from {}…").format( self.BBB, folder ) )
        self.sourceFolder = os.path.join( folder, self.BBB+'/' )

        # Read book metadata
        self.loadBookMetadata( os.path.join( self.sourceFolder, self.BBB+'__BookMetadata.txt' ) )

        fixErrors = []
        self._processedLines = InternalBibleEntryList() # Contains more-processed tuples which contain the actual Bible text -- see below

        DUMMY_VALUE = 999999 # Some number bigger than the number of characters in a line
        for CV in self.givenCVList:
            lineCount = 0
            if isinstance( CV, tuple) and len(CV)==2:
                C, V = CV
                filename = self.BBB+'_C'+C+'V'+V+'.txt'
            else:
                assert CV == ('-1',)
                C = V = '-1', '0'
                filename = self.BBB+'__Intro.txt'
            with open( os.path.join( self.sourceFolder, filename ), 'rt', encoding='utf-8' ) as myFile: # Automatically closes the file when done
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
                        logging.info( exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) )
                        line = line[1:] # Remove the Byte Order Marker (BOM)
                    if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
                    #print( CV, "line", line )
                    assert line and line[0]=='\\'
                    ixEQ = line.find( '=' )
                    ixLL = line.find( '<<' )
                    if ixEQ == -1: ixEQ = DUMMY_VALUE
                    if ixLL == -1: ixLL = DUMMY_VALUE
                    ix = min( ixEQ, ixLL )
                    marker = line[1:ix]
                    #print( 'marker', repr(marker) )
                    if ixLL == DUMMY_VALUE:
                        originalMarker = None
                        if marker == 'v~': originalMarker = 'v'
                        elif marker == 'c#': originalMarker = 'c'
                    else: originalMarker = line[ixLL+2:ixEQ]
                    #print( 'originalMarker', repr(originalMarker) )
                    if ixEQ == DUMMY_VALUE: text = None
                    else: text = line[ixEQ+1:]
                    #print( 'text', repr(text) )

                    if marker[0] == '¬':
                        assert originalMarker is None and text is None
                        adjText = extras = None
                    else:
                        if originalMarker is None: originalMarker = marker
                        if text is None: text = ''
                        adjText, cleanText, extras = self.processLineFix( C, V, originalMarker, text, fixErrors ) # separate out the notes (footnotes and cross-references)
                    self._processedLines.append( InternalBibleEntry(marker, originalMarker, adjText, cleanText, extras, text) )

            #if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
            #if debugging: print( self._rawLines ); halt
        if fixErrors: self.errorDictionary['Fix Text Errors'] = fixErrors
        self._processedFlag = True
        self.makeCVIndex()