Ejemplo n.º 1
0
    def loadSemanticDictionary(self, BBB, filename):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("    " +
                  _("Loading possible semantic dictionary from {}...").format(
                      filename))
        sourceFilepath = os.path.join(self.sourceFolder, filename)
        originalBook = ESFMFile()
        originalBook.read(sourceFilepath)

        count = 0
        for marker, originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' SEM' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in ESFM_SEMANTIC_TAGS \
                and originalText[1] == ' ' \
                and len(originalText)>2:
                    tagMarker = originalText[0]
                    tagContent = originalText[2:]
                    if tagMarker not in self.semanticDict:
                        self.semanticDict[tagMarker] = {}
                    if tagContent not in self.semanticDict[tagMarker]:
                        self.semanticDict[tagMarker][tagContent] = []
                    count += 1
        self.dontLoadBook.append(BBB)
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count:
                print("{} semantic entries added in {} categories".format(
                    count, len(self.semanticDict)))
            else:
                print("No semantic entries found.")
Ejemplo n.º 2
0
    def loadStrongsDictionary(self, BBB, filename):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1:
            print("    " +
                  _("Loading possible Strong's dictionary from {}...").format(
                      filename))
        sourceFilepath = os.path.join(self.sourceFolder, filename)
        originalBook = ESFMFile()
        originalBook.read(sourceFilepath)

        count = 0
        for marker, originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' STR' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in 'HG':
                    tagMarker = originalText[0]
                    sNumber = originalText[1:]
            elif marker == 'html':
                dictEntry = originalText
                if tagMarker not in self.StrongsDict:
                    self.StrongsDict[tagMarker] = {}
                if sNumber not in self.StrongsDict[tagMarker]:
                    self.StrongsDict[tagMarker][sNumber] = dictEntry
                count += 1
        self.dontLoadBook.append(BBB)
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count:
                print("{} Strong's entries added in {} categories".format(
                    count, len(self.StrongsDict)))
            else:
                print("No Strong's entries found.")
Ejemplo n.º 3
0
    def loadStrongsDictionary( self, BBB, filename ):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "    " + _("Loading possible Strong's dictionary from {}...").format( filename ) )
        sourceFilepath = os.path.join( self.sourceFolder, filename )
        originalBook = ESFMFile()
        originalBook.read( sourceFilepath )

        count = 0
        for marker,originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' STR' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in 'HG':
                    tagMarker = originalText[0]
                    sNumber = originalText[1:]
            elif marker == 'html':
                dictEntry = originalText
                if tagMarker not in self.StrongsDict: self.StrongsDict[tagMarker] = {}
                if sNumber not in self.StrongsDict[tagMarker]: self.StrongsDict[tagMarker][sNumber] = dictEntry
                count += 1
        self.dontLoadBook.append( BBB )
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count: print( "{} Strong's entries added in {} categories".format( count, len(self.StrongsDict) ) )
            else: print( "No Strong's entries found." )
Ejemplo n.º 4
0
    def loadSemanticDictionary( self, BBB, filename ):
        """
        """
        if BibleOrgSysGlobals.verbosityLevel > 1: print( "    " + _("Loading possible semantic dictionary from {}...").format( filename ) )
        sourceFilepath = os.path.join( self.sourceFolder, filename )
        originalBook = ESFMFile()
        originalBook.read( sourceFilepath )

        count = 0
        for marker,originalText in originalBook.lines:
            #print( marker, repr(originalText) )
            if marker == 'rem' and originalText.startswith('ESFM '):
                if ' SEM' not in originalText: return
            elif marker == 'gl':
                if originalText[0] in ESFM_SEMANTIC_TAGS \
                and originalText[1] == ' ' \
                and len(originalText)>2:
                    tagMarker = originalText[0]
                    tagContent = originalText[2:]
                    if tagMarker not in self.semanticDict: self.semanticDict[tagMarker] = {}
                    if tagContent not in self.semanticDict[tagMarker]: self.semanticDict[tagMarker][tagContent] = []
                    count += 1
        self.dontLoadBook.append( BBB )
        if BibleOrgSysGlobals.verbosityLevel > 1:
            if count: print( "{} semantic entries added in {} categories".format( count, len(self.semanticDict) ) )
            else: print( "No semantic entries found." )
Ejemplo n.º 5
0
    def load( self, filename, folder=None ):
        """
        Load the ESFM Bible book from a file.

        Tries to combine physical lines into logical lines,
            i.e., so that all lines begin with a ESFM paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """

        def ESFMPreprocessing( BBB, C, V, originalText ):
            """
            Converts ESFM tagging to pseudo-USFM codes for easier handling later on.

            Parameters:
                BBB, C, V parameters are just for use in error messages
                originalText is the text line from the file

            Returns:
                A string replacement to use instead of originalText

            Converts:
                XXX=PYYYY to \dic PXXX=YYY\dic*
                    e.g., "{the three lepers}=PMat6Lepers" to "the three lepers\dic Pthe_three_lepers=Mat6lepers\dic*"
                i.e, braces and equal signs are removed from the text
                    and the information is placed in a \dic field.

            Note: This DOESN'T remove the underline/underscore characters used to join translated words
                which were one word in the original, e.g., went_down
            """

            def saveWord( BBB, C, V, word ):
                """
                """
                #print( "saveWord( {}, {}:{}, {} )".format( BBB, C, V, repr(word) ) )
                assert( word and ' ' not in word  )
            # end of saveWord

            def saveSemanticTag( BBB, C, V, word, tag ):
                """
                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                #print( "saveSemanticTag( {}, {}:{}, {}, {} )".format( BBB, C, V, repr(word), repr(tag) ) )
                assert( word and ' ' not in word  )
                assert( tag and tag[0]=='=' and len(tag)>=2 )
                tagMarker, tagContent = tag[1], tag[2:]

                thisDict = self.containerBibleObject.semanticDict
                if tagMarker not in ESFM_SEMANTIC_TAGS:
                    loadErrors.append( _("{} {}:{} unknown ESFM {} tag content {}").format( self.BBB, C, V, repr(tagMarker), repr(tagContent) ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {} tag in {}".format( BBB, C, V, repr(tagMarker), repr(tag) ) )
                    self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
                    if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
                if not tagContent: tagContent = word

                # Now look in the semantic database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else: # couldn't find it
                    loadErrors.append( _("{} {}:{} unknown ESFM {} tag content {}").format( self.BBB, C, V, repr(tagMarker), repr(tagContent) ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {} tag content {}".format( BBB, C, V, repr(tagMarker), repr(tagContent) ) )
                    self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )

                if word==tagContent: return "\\sem {} {}\\sem*".format( tagMarker, word )
                return "\\sem {} {}={}\\sem*".format( tagMarker, word, tagContent )
            # end of saveSemanticTag

            def saveStrongsTag( BBB, C, V, word, tag ):
                """
                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                #print( "saveStrongsTag( {}, {}:{}, {}, {} )".format( BBB, C, V, repr(word), repr(tag) ) )
                assert( word and ' ' not in word  )
                assert( tag and tag[0]=='=' and tag[1]=='S' and len(tag)>=3 )
                tagMarker, tagContent = tag[2], tag[3:]

                thisDict = self.containerBibleObject.StrongsDict
                if tagMarker not in ESFM_STRONGS_TAGS:
                    loadErrors.append( _("{} {}:{} unknown ESFM {} tag content {}").format( self.BBB, C, V, repr(tagMarker), repr(tagContent) ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {} tag in {}".format( BBB, C, V, repr(tagMarker), repr(tag) ) )
                    self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
                    if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
                if not tagContent: tagContent = word

                # Now look in the Strongs database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisEntry = thisDict[tagMarker][tagContent]
                    if isinstance( thisEntry, str ):
                        thisDict[tagMarker][tagContent] = [thisEntry] # Convert from a string to a list with the string as the first list item
                    thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
                    #print( " ", tagMarker, tagContent, thisEntry )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else: # couldn't find it
                    loadErrors.append( _("{} {}:{} unknown ESFM {} tag content {}").format( self.BBB, C, V, repr(tagMarker), repr(tagContent) ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {} tag content {}".format( BBB, C, V, repr(tagMarker), repr(tagContent) ) )
                    self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )

                return "\\str {} {}={}\\str*".format( tagMarker, tagContent, word )
            # end of saveStrongsTag

            # Main code for ESFMPreprocessing
            text = ''
            if 1:
            # Analyse and collect all ESFM tags and special characters, and put the results into USFM type character fields
                bracedGroupFlag = underlineGroupFlag = hangingUnderlineFlag = startsWithUnderline = False
                word = underlineGroup = bracedGroup = tag = ''
                lastChar = ''
                #textLen = len( originalText )
                for j, originalChar in enumerate( originalText ):
                    char = originalChar
                    #nextChar = originalText[j+1] if j<textLen-1 else ''

                    #if '{'  in originalText or '_' in originalText or '=' in originalText:
                    #if C=='4' and V=='11':
                    #print( BBB, C, V )
                    #print( "{}={} lc={} uGF={} hUF={} uL={} bGF={} bG={} tg={} oT={}".format( j, repr(originalChar), repr(lastChar), underlineGroupFlag, hangingUnderlineFlag, repr(underlineGroup), bracedGroupFlag, repr(bracedGroup), repr(tag), repr(originalText) ) )

                    if char == ' ':
                        if lastChar == '_':
                            hangingUnderlineFlag = True
                            assert( text[-1] == ' ' )
                            text = text[:-1] # Remove the space from the underline otherwise we'll get two spaces
                        if lastChar != '_' and (not underlineGroupFlag) and (not hangingUnderlineFlag):
                            #if underlineGroup: print( "underlineGroup was: {}".format( repr(underlineGroup) ) )
                            underlineGroup = ''
                    if lastChar == ' ': startsWithUnderline =  char == '_'

                    if bracedGroupFlag:
                        if char == '}': bracedGroupFlag = False
                        else: bracedGroup += char if char!=' ' else '_'
                    if tag:
                        if BibleOrgSysGlobals.debugFlag: assert( tag[0] == '=' )
                        if char in ' _=' or char in ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
                            if underlineGroupFlag:
                                underlineGroup += word
                                if char == '_': underlineGroup += char
                                else: underlineGroupFlag = False
                            if len(tag) > 1:
                                if tag[1]=='S':
                                    text += saveStrongsTag( BBB, C, V, underlineGroup if underlineGroup else word, tag )
                                    underlineGroup = ''
                                    underlineGroupFlag = hangingUnderlineFlag = False
                                else:
                                    text += saveSemanticTag( BBB, C, V, bracedGroup if bracedGroup else word, tag )
                                if char == '_':
                                    if not underlineGroupFlag: # it's just starting now
                                        underlineGroup += word + char
                                        underlineGroupFlag = True
                                    char = ' ' # to go into text
                                elif char != '=': underlineGroupFlag = False
                                if char == '=': tag = char # Started a new consecutive tag
                                else:
                                    if word: saveWord( BBB, C, V, word )
                                    word = bracedGroup = tag = ''
                                    if char!='}': text += char
                            else:
                                loadErrors.append( _("{} {}:{} unexpected short ESFM tag at {}={} in {}").format( self.BBB, C, V, j, repr(originalChar), repr(originalText) ) )
                                logging.error( "ESFM tagging error in {} {}:{}: unexpected short tag at {}={} in {}".format( BBB, C, V, j, repr(originalChar), repr(originalText) ) )
                                self.addPriorityError( 21, C, V, _("Unexpected ESFM short tag") )
                        else: # still in tag
                            tag += char
                    else: # not in tag
                        if char == '=': tag = char
                        else: # still not in tag
                            if char == '{':
                                if (lastChar and lastChar!=' ') or tag or bracedGroupFlag or bracedGroup:
                                    loadErrors.append( _("{} {}:{} unexpected ESFM opening brace at {}={} in {}").format( self.BBB, C, V, j, repr(originalChar), repr(originalText) ) )
                                    logging.error( "ESFM tagging error in {} {}:{}: unexpected opening brace at {}={} in {}".format( BBB, C, V, j, repr(originalChar), repr(originalText) ) )
                                    self.addPriorityError( 20, C, V, _("Unexpected ESFM opening brace") )
                                bracedGroupFlag = True
                                char = '' # nothing to go into text
                            elif char in ' _' or char in DASH_CHARS:
                                if underlineGroupFlag:
                                    underlineGroup += word
                                    if char == '_':
                                        underlineGroup += char
                                        char = ' ' # to go into text
                                    else: underlineGroupFlag = False
                                elif char == ' ':
                                    underlineGroupFlag = False
                                    if startsWithUnderline:
                                        underlineGroup += word
                                        startsWithUnderline = False
                                elif char == '_':
                                    if hangingUnderlineFlag:
                                        char = '' # nothing to go into text
                                        hangingUnderlineFlag = False # underlineGroupFlag will be set instead below
                                    else: # not hanging underline
                                        underlineGroup += word + char
                                        char = ' ' # to go into text
                                    underlineGroupFlag = True
                                if word: saveWord( BBB, C, V, word )
                                word = ''
                            elif char!='}': word += char
                            if char!='}': text += char
                    lastChar = originalChar

            else: # TEMP: just remove all ESFM tags and special characters
                inTag = False
                for char in originalText:
                    if inTag:
                        if char in ' _' or char in ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
                            inTag = False
                            text += char
                    else: # not in tag
                        if char == '=': inTag = True; continue
                        text += char
                text = text.replace('{','').replace('}','').replace('_(',' ').replace(')_',' ').replace('_',' ')
                #if text != originalText:
                    #print( "from: {}".format( repr(originalText) ) )
                    #print( " got: {}".format( repr(text) ) )

            #if '{'  in originalText or '_' in originalText or '=' in originalText:
                #print( "original:", repr(originalText) )
                #print( "returned:", repr(text), '\n' )
            return text
        # end of ESFMBibleBook.ESFMPreprocessing


        def doaddLine( originalMarker, originalText ):
            """
            Check for newLine markers within the line (if so, break the line) and save the information in our database.

            Also convert ~ to a proper non-break space.
            """
            #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) )
            marker, text = originalMarker, originalText.replace( '~', ' ' )
            marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( originalMarker )
            if marker != originalMarker:
                loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
                logging.error( _("ESFM doesn't allow the unnumbered marker after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
                self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )

            if '\\' in text: # Check markers inside the lines
                markerList = BibleOrgSysGlobals.USFMMarkers.getMarkerListFromText( text )
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers
                    if insideMarker == '\\': # it's a free-standing backspace
                        loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) )
                        logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                        self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") )
                    elif BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly
                        if ix==0:
                            loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) )
                            logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                            self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) )
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine( marker, thisText )
                        ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( insideMarker ) # setup for the next line
                        if marker != insideMarker:
                            loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker within line \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
                            logging.error( _("ESFM doesn't allow the unnumbered marker within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
                            self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )

                if ix != 0: # We must have separated multiple lines
                    text = text[ix:] # Get the final bit of the line
            self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above)
        # end of ESFMBibleBook.doaddLine


        # Main code for load
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {}...").format( filename ) )
        #self.BBB = BBB
        #self.isSingleChapterBook = BibleOrgSysGlobals.BibleBooksCodes.isSingleChapterBook( BBB )
        self.sourceFilename = filename
        self.sourceFolder = folder
        self.sourceFilepath = os.path.join( folder, filename ) if folder else filename
        originalBook = ESFMFile()
        originalBook.read( self.sourceFilepath )

        # Do some important cleaning up before we save the data
        C = V = '0'
        lastMarker = lastText = ''
        loadErrors = []
        for marker,originalText in originalBook.lines: # Always process a line behind in case we have to combine lines
            #print( "After {} {}:{} \\{} {!r}".format( self.BBB, C, V, marker, originalText ) )

            # Keep track of where we are for more helpful error messages
            if marker=='c' and originalText: C, V = originalText.split()[0], '0'
            elif marker=='v' and originalText:
                V = originalText.split()[0]
                if C == '0': C = '1' # Some single chapter books don't have an explicit chapter 1 marker
            elif marker=='restore': continue # Ignore these lines completely

            text = ESFMPreprocessing( self.BBB, C, V, originalText ) # Convert ESFM encoding to pseudo-USFM

            # Now load the actual Bible book data
            if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( marker ):
                if lastMarker: doaddLine( lastMarker, lastText )
                lastMarker, lastText = marker, text
            elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker[:-1] ): # the line begins with an internal marker -- append it to the previous line
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line with text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line with text: {}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 27, C, V, _("Found \\{} internal marker on new line in file").format( marker ) )
                if not lastText.endswith(' '): lastText += ' ' # Not always good to add a space, but it's their fault!
                lastText +=  '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
            elif BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker[:-1] ): # the line begins with a note marker -- append it to the previous line
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line with text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line with text: {}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 26, C, V, _("Found \\{} note marker on new line in file").format( marker ) )
                if not lastText.endswith(' ') and marker!='f': lastText += ' ' # Not always good to add a space, but it's their fault! Don't do it for footnotes, though.
                lastText +=  '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
            else: # the line begins with an unknown marker
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line with text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line with text: {}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line (with no text").format( self.BBB, C, V, marker ) )
                    logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 100, C, V, _("Found \\{} unknown marker on new line in file").format( marker ) )
                for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space
                    if marker.startswith( tryMarker ): # Let's try changing it
                        if lastMarker: doaddLine( lastMarker, lastText )
                        lastMarker, lastText = tryMarker, marker[len(tryMarker):] + ' ' + text
                        loadErrors.append( _("{} {}:{} Changed '\\{}' unknown marker to {!r} at beginning of line: {}").format( self.BBB, C, V, marker, tryMarker, text ) )
                        logging.warning( _("Changed '\\{}' unknown marker to {!r} after {} {}:{} at beginning of line: {}").format( marker, tryMarker, self.BBB, C, V, text ) )
                        break
                # Otherwise, don't bother processing this line -- it'll just cause more problems later on
        if lastMarker: doaddLine( lastMarker, lastText ) # Process the final line

        if not originalBook.lines: # There were no lines!!!
            loadErrors.append( _("{} This ESFM file was totally empty: {}").format( self.BBB, self.sourceFilename ) )
            logging.error( _("ESFM file for {} was totally empty: {}").format( self.BBB, self.sourceFilename ) )
            lastMarker, lastText = 'rem', 'This (ESFM) file was completely empty' # Save something since we had a file at least

        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        if 0 and BibleOrgSysGlobals.debugFlag and self.BBB=='JNA':
            for name,thisDict in  ( ('SEM',self.containerBibleObject.semanticDict), ('STR',self.containerBibleObject.StrongsDict) ):
                if 'Tag errors' in thisDict:
                    print( "\n{} Tag errors: {}".format( name, thisDict['Tag errors'] ) )
                if 'Missing' in thisDict:
                    print( "\n{} Missing: {}".format( name, thisDict['Missing'] ) )
                if thisDict == self.containerBibleObject.semanticDict:
                    for tag in ESFM_SEMANTIC_TAGS:
                        if tag in thisDict:
                            print( "\n{} Found {}: {}".format( name, tag, thisDict[tag] ) )
                elif thisDict == self.containerBibleObject.StrongsDict:
                    for tag in ESFM_STRONGS_TAGS:
                        for num in thisDict[tag]:
                            if isinstance( thisDict[tag][num], list ):
                                print( "\n{} Found {} {}: {}".format( name, tag, num, thisDict[tag][num] ) )
            halt
Ejemplo n.º 6
0
    def load(self, filename, folder=None):
        """
        Load the ESFM Bible book from a file.

        Tries to combine physical lines into logical lines,
            i.e., so that all lines begin with a ESFM paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """
        def ESFMPreprocessing(BBB, C, V, originalText):
            """
            Converts ESFM tagging to pseudo-USFM codes for easier handling later on.

            Parameters:
                BBB, C, V parameters are just for use in error messages
                originalText is the text line from the file

            Returns:
                A string replacement to use instead of originalText

            Converts:
                XXX=PYYYY to \dic PXXX=YYY\dic*
                    e.g., "{the three lepers}=PMat6Lepers" to "the three lepers\dic Pthe_three_lepers=Mat6lepers\dic*"
                i.e, braces and equal signs are removed from the text
                    and the information is placed in a \dic field.

            Note: This DOESN'T remove the underline/underscore characters used to join translated words
                which were one word in the original, e.g., went_down
            """
            def saveWord(BBB, C, V, word):
                """
                """
                #print( "saveWord( {}, {}:{}, {} )".format( BBB, C, V, repr(word) ) )
                assert (word and ' ' not in word)

            # end of saveWord

            def saveSemanticTag(BBB, C, V, word, tag):
                """
                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                #print( "saveSemanticTag( {}, {}:{}, {}, {} )".format( BBB, C, V, repr(word), repr(tag) ) )
                assert (word and ' ' not in word)
                assert (tag and tag[0] == '=' and len(tag) >= 2)
                tagMarker, tagContent = tag[1], tag[2:]

                thisDict = self.containerBibleObject.semanticDict
                if tagMarker not in ESFM_SEMANTIC_TAGS:
                    loadErrors.append(
                        _("{} {}:{} unknown ESFM {} tag content {}").format(
                            self.BBB, C, V, repr(tagMarker), repr(tagContent)))
                    logging.error(
                        "ESFM tagging error in {} {}:{}: unknown {} tag in {}".
                        format(BBB, C, V, repr(tagMarker), repr(tag)))
                    self.addPriorityError(15, C, V,
                                          _("Unknown ESFM semantic tag"))
                    if 'Tag errors' not in thisDict:
                        thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append((BBB, C, V, tag[1:]))
                if not tagContent: tagContent = word

                # Now look in the semantic database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisDict[tagMarker][tagContent].append((BBB, C, V, word))
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else:  # couldn't find it
                    loadErrors.append(
                        _("{} {}:{} unknown ESFM {} tag content {}").format(
                            self.BBB, C, V, repr(tagMarker), repr(tagContent)))
                    logging.error(
                        "ESFM tagging error in {} {}:{}: unknown {} tag content {}"
                        .format(BBB, C, V, repr(tagMarker), repr(tagContent)))
                    self.addPriorityError(15, C, V,
                                          _("Unknown ESFM semantic tag"))
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']:
                        thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]:
                        thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append((
                        BBB, C, V) if word == tagContent else (BBB, C, V,
                                                               word))

                if word == tagContent:
                    return "\\sem {} {}\\sem*".format(tagMarker, word)
                return "\\sem {} {}={}\\sem*".format(tagMarker, word,
                                                     tagContent)

            # end of saveSemanticTag

            def saveStrongsTag(BBB, C, V, word, tag):
                """
                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                #print( "saveStrongsTag( {}, {}:{}, {}, {} )".format( BBB, C, V, repr(word), repr(tag) ) )
                assert (word and ' ' not in word)
                assert (tag and tag[0] == '=' and tag[1] == 'S'
                        and len(tag) >= 3)
                tagMarker, tagContent = tag[2], tag[3:]

                thisDict = self.containerBibleObject.StrongsDict
                if tagMarker not in ESFM_STRONGS_TAGS:
                    loadErrors.append(
                        _("{} {}:{} unknown ESFM {} tag content {}").format(
                            self.BBB, C, V, repr(tagMarker), repr(tagContent)))
                    logging.error(
                        "ESFM tagging error in {} {}:{}: unknown {} tag in {}".
                        format(BBB, C, V, repr(tagMarker), repr(tag)))
                    self.addPriorityError(10, C, V,
                                          _("Unknown ESFM Strong's tag"))
                    if 'Tag errors' not in thisDict:
                        thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append((BBB, C, V, tag[1:]))
                if not tagContent: tagContent = word

                # Now look in the Strongs database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisEntry = thisDict[tagMarker][tagContent]
                    if isinstance(thisEntry, str):
                        thisDict[tagMarker][tagContent] = [
                            thisEntry
                        ]  # Convert from a string to a list with the string as the first list item
                    thisDict[tagMarker][tagContent].append((BBB, C, V, word))
                    #print( " ", tagMarker, tagContent, thisEntry )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else:  # couldn't find it
                    loadErrors.append(
                        _("{} {}:{} unknown ESFM {} tag content {}").format(
                            self.BBB, C, V, repr(tagMarker), repr(tagContent)))
                    logging.error(
                        "ESFM tagging error in {} {}:{}: unknown {} tag content {}"
                        .format(BBB, C, V, repr(tagMarker), repr(tagContent)))
                    self.addPriorityError(10, C, V,
                                          _("Unknown ESFM Strong's tag"))
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']:
                        thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]:
                        thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append((
                        BBB, C, V) if word == tagContent else (BBB, C, V,
                                                               word))

                return "\\str {} {}={}\\str*".format(tagMarker, tagContent,
                                                     word)

            # end of saveStrongsTag

            # Main code for ESFMPreprocessing
            text = ''
            if 1:
                # Analyse and collect all ESFM tags and special characters, and put the results into USFM type character fields
                bracedGroupFlag = underlineGroupFlag = hangingUnderlineFlag = startsWithUnderline = False
                word = underlineGroup = bracedGroup = tag = ''
                lastChar = ''
                #textLen = len( originalText )
                for j, originalChar in enumerate(originalText):
                    char = originalChar
                    #nextChar = originalText[j+1] if j<textLen-1 else ''

                    #if '{'  in originalText or '_' in originalText or '=' in originalText:
                    #if C=='4' and V=='11':
                    #print( BBB, C, V )
                    #print( "{}={} lc={} uGF={} hUF={} uL={} bGF={} bG={} tg={} oT={}".format( j, repr(originalChar), repr(lastChar), underlineGroupFlag, hangingUnderlineFlag, repr(underlineGroup), bracedGroupFlag, repr(bracedGroup), repr(tag), repr(originalText) ) )

                    if char == ' ':
                        if lastChar == '_':
                            hangingUnderlineFlag = True
                            assert (text[-1] == ' ')
                            text = text[:
                                        -1]  # Remove the space from the underline otherwise we'll get two spaces
                        if lastChar != '_' and (not underlineGroupFlag) and (
                                not hangingUnderlineFlag):
                            #if underlineGroup: print( "underlineGroup was: {}".format( repr(underlineGroup) ) )
                            underlineGroup = ''
                    if lastChar == ' ': startsWithUnderline = char == '_'

                    if bracedGroupFlag:
                        if char == '}': bracedGroupFlag = False
                        else: bracedGroup += char if char != ' ' else '_'
                    if tag:
                        if BibleOrgSysGlobals.debugFlag: assert (tag[0] == '=')
                        if char in ' _=' or char in ALL_WORD_PUNCT_CHARS:  # Note: A forward slash is permitted
                            if underlineGroupFlag:
                                underlineGroup += word
                                if char == '_': underlineGroup += char
                                else: underlineGroupFlag = False
                            if len(tag) > 1:
                                if tag[1] == 'S':
                                    text += saveStrongsTag(
                                        BBB, C, V, underlineGroup
                                        if underlineGroup else word, tag)
                                    underlineGroup = ''
                                    underlineGroupFlag = hangingUnderlineFlag = False
                                else:
                                    text += saveSemanticTag(
                                        BBB, C, V,
                                        bracedGroup if bracedGroup else word,
                                        tag)
                                if char == '_':
                                    if not underlineGroupFlag:  # it's just starting now
                                        underlineGroup += word + char
                                        underlineGroupFlag = True
                                    char = ' '  # to go into text
                                elif char != '=':
                                    underlineGroupFlag = False
                                if char == '=':
                                    tag = char  # Started a new consecutive tag
                                else:
                                    if word: saveWord(BBB, C, V, word)
                                    word = bracedGroup = tag = ''
                                    if char != '}': text += char
                            else:
                                loadErrors.append(
                                    _("{} {}:{} unexpected short ESFM tag at {}={} in {}"
                                      ).format(self.BBB, C, V, j,
                                               repr(originalChar),
                                               repr(originalText)))
                                logging.error(
                                    "ESFM tagging error in {} {}:{}: unexpected short tag at {}={} in {}"
                                    .format(BBB, C, V, j, repr(originalChar),
                                            repr(originalText)))
                                self.addPriorityError(
                                    21, C, V, _("Unexpected ESFM short tag"))
                        else:  # still in tag
                            tag += char
                    else:  # not in tag
                        if char == '=': tag = char
                        else:  # still not in tag
                            if char == '{':
                                if (lastChar and lastChar != ' '
                                    ) or tag or bracedGroupFlag or bracedGroup:
                                    loadErrors.append(
                                        _("{} {}:{} unexpected ESFM opening brace at {}={} in {}"
                                          ).format(self.BBB, C, V, j,
                                                   repr(originalChar),
                                                   repr(originalText)))
                                    logging.error(
                                        "ESFM tagging error in {} {}:{}: unexpected opening brace at {}={} in {}"
                                        .format(BBB, C, V, j,
                                                repr(originalChar),
                                                repr(originalText)))
                                    self.addPriorityError(
                                        20, C, V,
                                        _("Unexpected ESFM opening brace"))
                                bracedGroupFlag = True
                                char = ''  # nothing to go into text
                            elif char in ' _' or char in DASH_CHARS:
                                if underlineGroupFlag:
                                    underlineGroup += word
                                    if char == '_':
                                        underlineGroup += char
                                        char = ' '  # to go into text
                                    else:
                                        underlineGroupFlag = False
                                elif char == ' ':
                                    underlineGroupFlag = False
                                    if startsWithUnderline:
                                        underlineGroup += word
                                        startsWithUnderline = False
                                elif char == '_':
                                    if hangingUnderlineFlag:
                                        char = ''  # nothing to go into text
                                        hangingUnderlineFlag = False  # underlineGroupFlag will be set instead below
                                    else:  # not hanging underline
                                        underlineGroup += word + char
                                        char = ' '  # to go into text
                                    underlineGroupFlag = True
                                if word: saveWord(BBB, C, V, word)
                                word = ''
                            elif char != '}':
                                word += char
                            if char != '}': text += char
                    lastChar = originalChar

            else:  # TEMP: just remove all ESFM tags and special characters
                inTag = False
                for char in originalText:
                    if inTag:
                        if char in ' _' or char in ALL_WORD_PUNCT_CHARS:  # Note: A forward slash is permitted
                            inTag = False
                            text += char
                    else:  # not in tag
                        if char == '=':
                            inTag = True
                            continue
                        text += char
                text = text.replace('{', '').replace('}', '').replace(
                    '_(', ' ').replace(')_', ' ').replace('_', ' ')
                #if text != originalText:
                #print( "from: {}".format( repr(originalText) ) )
                #print( " got: {}".format( repr(text) ) )

            #if '{'  in originalText or '_' in originalText or '=' in originalText:
            #print( "original:", repr(originalText) )
            #print( "returned:", repr(text), '\n' )
            return text

        # end of ESFMBibleBook.ESFMPreprocessing

        def doaddLine(originalMarker, originalText):
            """
            Check for newLine markers within the line (if so, break the line) and save the information in our database.

            Also convert ~ to a proper non-break space.
            """
            #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) )
            marker, text = originalMarker, originalText.replace('~', ' ')
            marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker(
                originalMarker)
            if marker != originalMarker:
                loadErrors.append(
                    _("{} {}:{} ESFM doesn't allow unnumbered marker \\{}: {!r}"
                      ).format(self.BBB, C, V, originalMarker, originalText))
                logging.error(
                    _("ESFM doesn't allow the unnumbered marker after {} {}:{} in \\{}: {!r}"
                      ).format(self.BBB, C, V, originalMarker, originalText))
                self.addPriorityError(
                    90, C, V, _("ESFM doesn't allow unnumbered markers"))

            if '\\' in text:  # Check markers inside the lines
                markerList = BibleOrgSysGlobals.USFMMarkers.getMarkerListFromText(
                    text)
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList:  # check paragraph markers
                    if insideMarker == '\\':  # it's a free-standing backspace
                        loadErrors.append(
                            _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}"
                              ).format(self.BBB, C, V, marker, text))
                        logging.error(
                            _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}"
                              ).format(self.BBB, C, V, marker, text)
                        )  # Only log the first error in the line
                        self.addPriorityError(
                            100, C, V,
                            _("Improper free-standing backspace character inside a line"
                              ))
                    elif BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(
                            insideMarker
                    ):  # Need to split the line for everything else to work properly
                        if ix == 0:
                            loadErrors.append(
                                _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}"
                                  ).format(self.BBB, C, V, insideMarker,
                                           marker, text))
                            logging.error(
                                _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}"
                                  ).format(insideMarker, self.BBB, C, V,
                                           marker, text)
                            )  # Only log the first error in the line
                            self.addPriorityError(
                                96, C, V,
                                _("NewLine marker \\{} shouldn't be inside a line"
                                  ).format(insideMarker))
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine(marker, thisText)
                        ix = iMIndex + 1 + len(insideMarker) + len(
                            nextSignificantChar
                        )  # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker(
                            insideMarker)  # setup for the next line
                        if marker != insideMarker:
                            loadErrors.append(
                                _("{} {}:{} ESFM doesn't allow unnumbered marker within line \\{}: {!r}"
                                  ).format(self.BBB, C, V, insideMarker,
                                           originalText))
                            logging.error(
                                _("ESFM doesn't allow the unnumbered marker within line after {} {}:{} in \\{}: {!r}"
                                  ).format(self.BBB, C, V, insideMarker,
                                           originalText))
                            self.addPriorityError(
                                90, C, V,
                                _("ESFM doesn't allow unnumbered markers"))

                if ix != 0:  # We must have separated multiple lines
                    text = text[ix:]  # Get the final bit of the line
            self.addLine(
                marker, text
            )  # Call the function in the base class to save the line (or the remainder of the line if we split it above)

        # end of ESFMBibleBook.doaddLine

        # Main code for load
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print("  " + _("Loading {}...").format(filename))
        #self.BBB = BBB
        #self.isSingleChapterBook = BibleOrgSysGlobals.BibleBooksCodes.isSingleChapterBook( BBB )
        self.sourceFilename = filename
        self.sourceFolder = folder
        self.sourceFilepath = os.path.join(folder,
                                           filename) if folder else filename
        originalBook = ESFMFile()
        originalBook.read(self.sourceFilepath)

        # Do some important cleaning up before we save the data
        C = V = '0'
        lastMarker = lastText = ''
        loadErrors = []
        for marker, originalText in originalBook.lines:  # Always process a line behind in case we have to combine lines
            #print( "After {} {}:{} \\{} {!r}".format( self.BBB, C, V, marker, originalText ) )

            # Keep track of where we are for more helpful error messages
            if marker == 'c' and originalText:
                C, V = originalText.split()[0], '0'
            elif marker == 'v' and originalText:
                V = originalText.split()[0]
                if C == '0':
                    C = '1'  # Some single chapter books don't have an explicit chapter 1 marker
            elif marker == 'restore':
                continue  # Ignore these lines completely

            text = ESFMPreprocessing(
                self.BBB, C, V,
                originalText)  # Convert ESFM encoding to pseudo-USFM

            # Now load the actual Bible book data
            if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(marker):
                if lastMarker: doaddLine(lastMarker, lastText)
                lastMarker, lastText = marker, text
            elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker[:-1] ): # the line begins with an internal marker -- append it to the previous line
                if text:
                    loadErrors.append(
                        _("{} {}:{} Found '\\{}' internal marker at beginning of line with text: {}"
                          ).format(self.BBB, C, V, marker, text))
                    logging.warning(
                        _("Found '\\{}' internal marker after {} {}:{} at beginning of line with text: {}"
                          ).format(marker, self.BBB, C, V, text))
                else:  # no text
                    loadErrors.append(
                        _("{} {}:{} Found '\\{}' internal marker at beginning of line (with no text)"
                          ).format(self.BBB, C, V, marker))
                    logging.warning(
                        _("Found '\\{}' internal marker after {} {}:{} at beginning of line (with no text)"
                          ).format(marker, self.BBB, C, V))
                self.addPriorityError(
                    27, C, V,
                    _("Found \\{} internal marker on new line in file").format(
                        marker))
                if not lastText.endswith(' '):
                    lastText += ' '  # Not always good to add a space, but it's their fault!
                lastText += '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3:
                    print(
                        "{} {} {} Appended {}:{!r} to get combined line {}:{!r}"
                        .format(self.BBB, C, V, marker, text, lastMarker,
                                lastText))
            elif BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker[:-1] ): # the line begins with a note marker -- append it to the previous line
                if text:
                    loadErrors.append(
                        _("{} {}:{} Found '\\{}' note marker at beginning of line with text: {}"
                          ).format(self.BBB, C, V, marker, text))
                    logging.warning(
                        _("Found '\\{}' note marker after {} {}:{} at beginning of line with text: {}"
                          ).format(marker, self.BBB, C, V, text))
                else:  # no text
                    loadErrors.append(
                        _("{} {}:{} Found '\\{}' note marker at beginning of line (with no text)"
                          ).format(self.BBB, C, V, marker))
                    logging.warning(
                        _("Found '\\{}' note marker after {} {}:{} at beginning of line (with no text)"
                          ).format(marker, self.BBB, C, V))
                self.addPriorityError(
                    26, C, V,
                    _("Found \\{} note marker on new line in file").format(
                        marker))
                if not lastText.endswith(' ') and marker != 'f':
                    lastText += ' '  # Not always good to add a space, but it's their fault! Don't do it for footnotes, though.
                lastText += '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3:
                    print(
                        "{} {} {} Appended {}:{!r} to get combined line {}:{!r}"
                        .format(self.BBB, C, V, marker, text, lastMarker,
                                lastText))
            else:  # the line begins with an unknown marker
                if text:
                    loadErrors.append(
                        _("{} {}:{} Found '\\{}' unknown marker at beginning of line with text: {}"
                          ).format(self.BBB, C, V, marker, text))
                    logging.error(
                        _("Found '\\{}' unknown marker after {} {}:{} at beginning of line with text: {}"
                          ).format(marker, self.BBB, C, V, text))
                else:  # no text
                    loadErrors.append(
                        _("{} {}:{} Found '\\{}' unknown marker at beginning of line (with no text"
                          ).format(self.BBB, C, V, marker))
                    logging.error(
                        _("Found '\\{}' unknown marker after {} {}:{} at beginning of line (with no text)"
                          ).format(marker, self.BBB, C, V))
                self.addPriorityError(
                    100, C, V,
                    _("Found \\{} unknown marker on new line in file").format(
                        marker))
                for tryMarker in sortedNLMarkers:  # Try to do something intelligent here -- it might be just a missing space
                    if marker.startswith(tryMarker):  # Let's try changing it
                        if lastMarker: doaddLine(lastMarker, lastText)
                        lastMarker, lastText = tryMarker, marker[
                            len(tryMarker):] + ' ' + text
                        loadErrors.append(
                            _("{} {}:{} Changed '\\{}' unknown marker to {!r} at beginning of line: {}"
                              ).format(self.BBB, C, V, marker, tryMarker,
                                       text))
                        logging.warning(
                            _("Changed '\\{}' unknown marker to {!r} after {} {}:{} at beginning of line: {}"
                              ).format(marker, tryMarker, self.BBB, C, V,
                                       text))
                        break
                # Otherwise, don't bother processing this line -- it'll just cause more problems later on
        if lastMarker:
            doaddLine(lastMarker, lastText)  # Process the final line

        if not originalBook.lines:  # There were no lines!!!
            loadErrors.append(
                _("{} This ESFM file was totally empty: {}").format(
                    self.BBB, self.sourceFilename))
            logging.error(
                _("ESFM file for {} was totally empty: {}").format(
                    self.BBB, self.sourceFilename))
            lastMarker, lastText = 'rem', 'This (ESFM) file was completely empty'  # Save something since we had a file at least

        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        if 0 and BibleOrgSysGlobals.debugFlag and self.BBB == 'JNA':
            for name, thisDict in (('SEM',
                                    self.containerBibleObject.semanticDict),
                                   ('STR',
                                    self.containerBibleObject.StrongsDict)):
                if 'Tag errors' in thisDict:
                    print("\n{} Tag errors: {}".format(name,
                                                       thisDict['Tag errors']))
                if 'Missing' in thisDict:
                    print("\n{} Missing: {}".format(name, thisDict['Missing']))
                if thisDict == self.containerBibleObject.semanticDict:
                    for tag in ESFM_SEMANTIC_TAGS:
                        if tag in thisDict:
                            print("\n{} Found {}: {}".format(
                                name, tag, thisDict[tag]))
                elif thisDict == self.containerBibleObject.StrongsDict:
                    for tag in ESFM_STRONGS_TAGS:
                        for num in thisDict[tag]:
                            if isinstance(thisDict[tag][num], list):
                                print("\n{} Found {} {}: {}".format(
                                    name, tag, num, thisDict[tag][num]))
            halt
Ejemplo n.º 7
0
    def load( self, filename, folder=None ):
        """
        Load the ESFM Bible book from a file.

        Tries to combine physical lines into logical lines,
            i.e., so that all lines begin with a ESFM paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """
        if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            print( "ESFM.load( {}, {} )".format( filename, folder ) )


        def ESFMPreprocessing( BBB, C, V, marker, originalText ):
            """
            Converts ESFM tagging to pseudo-USFM codes for easier handling later on.

            Parameters:
                BBB, C, V parameters are just for use in error messages
                originalText is the text line from the file

            Returns:
                A string replacement to use instead of originalText

            Converts:
                XXX=PYYYY to \dic PXXX=YYY\dic*
                    e.g., "{the three lepers}=PMat6Lepers" to "the three lepers\dic Pthe_three_lepers=Mat6lepers\dic*"
                i.e, braces and equal signs are removed from the text
                    and the information is placed in a \dic field.

            Note: This DOESN'T remove the underline/underscore characters used to join translated words
                which were one word in the original, e.g., went_down
            """
            if (debuggingThisModule or BibleOrgSysGlobals.debugFlag) \
            and len(originalText)>5: # Don't display for "blank" lines (like '\v 10 ')
                print( "\n\nESFMPreprocessing( {} {}:{}, {}, {!r} )".format( BBB, C, V, marker, originalText ) )


            def saveWord( BBB, C, V, word ):
                """
                """
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
                    print( "ESFM saveWord( {}, {}:{}, {!r} )".format( BBB, C, V, word ) )
                assert word and ' ' not in word
            # end of saveWord

            def saveSemanticTag( BBB, C, V, word, tag ):
                """
                Fills the semantic dictionary with keys:
                    'Tag errors': contains a list of 4-tuples (BBB,C,V,errorWord)
                    'Missing': contains a dictionary
                    'A' 'G' 'L' 'O' 'P' 'Q' entries each containing a dictionary
                        where the key is the name (e.g., 'Jonah')
                        and the entry is a list of 4-tuples (BBB,C,V,actualWord)

                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
                    print( "ESFM saveSemanticTag( {} {}:{}, {!r}, {!r} )".format( BBB, C, V, word, tag ) )
                assert word and ' ' not in word
                assert tag and tag[0]=='=' and len(tag)>=2
                tagMarker, tagContent = tag[1], tag[2:]

                thisDict = self.containerBibleObject.semanticDict
                if tagMarker not in ESFM_SEMANTIC_TAGS:
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag in {!r}".format( BBB, C, V, tagMarker, tag ) )
                    self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
                    if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
                if not tagContent: tagContent = word

                # Now look in the semantic database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else: # couldn't find it
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag content {!r}".format( BBB, C, V, tagMarker, tagContent ) )
                    self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )

                if word==tagContent:
                    return "\\sem {} {}\\sem*".format( tagMarker, word )
                return "\\sem {} {}={}\\sem*".format( tagMarker, word, tagContent )
            # end of saveSemanticTag


            def saveStrongsTag( BBB, C, V, word, tag ):
                """
                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
                    print( "ESFM saveStrongsTag( {}, {}:{}, {!r}, {!r} )".format( BBB, C, V, word, tag ) )
                assert word and ' ' not in word
                assert tag and tag[0]=='=' and tag[1]=='S' and len(tag)>=3
                tagMarker, tagContent = tag[2], tag[3:]

                thisDict = self.containerBibleObject.StrongsDict
                if tagMarker not in ESFM_STRONGS_TAGS:
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag in {!r}".format( BBB, C, V, tagMarker, tag ) )
                    self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
                    if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
                if not tagContent: tagContent = word

                # Now look in the Strongs database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisEntry = thisDict[tagMarker][tagContent]
                    if isinstance( thisEntry, str ):
                        thisDict[tagMarker][tagContent] = [thisEntry] # Convert from a string to a list with the string as the first list item
                    thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
                    #print( " ", tagMarker, tagContent, thisEntry )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else: # couldn't find it
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag content {!r}".format( BBB, C, V, tagMarker, tagContent ) )
                    self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )

                return "\\str {} {}={}\\str*".format( tagMarker, tagContent, word )
            # end of saveStrongsTag


            # Main code for ESFMPreprocessing
            # Analyse and collect all ESFM tags and special characters,
            #    and put the results into USFM type character fields
            bracedGroupFlag = underlineGroupFlag = startsWithUnderline = False
            word = underlineGroup = bracedGroupText = tagText = ''
            # The tag is the bit starting with =, e.g., '=PJonah'
            hangingUnderlineCount = 0 # Count of unclosed '…_ ' sequences
            lastChar = ''
            #textLen = len( originalText )
            resultText = ''
            firstWordFlag = True
            #print( 'ESFMPreprocessing {} {}:{}'.format( BBB, C, V ) )
            for j, originalChar in enumerate( originalText ):
                char = originalChar
                #nextChar = originalText[j+1] if j<textLen-1 else ''

                #if '{'  in originalText or '_' in originalText or '=' in originalText:
                #if C=='4' and V=='11':
                #print( "  ESFMPreprocessing {}={!r} lc={!r} uGF={} hUC={} uL={!r} bGF={} bG={!r} tg={!r} \n    oT={!r} \n    rT={!r}" \
                    #.format( j, originalChar, lastChar, underlineGroupFlag, hangingUnderlineCount, underlineGroup, bracedGroupFlag, bracedGroup, tag, originalText, resultText ) )

                # Handle hanging underlines, e.g., 'and_ ' or ' _then' or 'and_ they_ _were_ _not _ashamed'
                if char == ' ':
                    if lastChar == '_':
                        hangingUnderlineCount += 1
                        assert hangingUnderlineCount < 3
                        #assert resultText[-1] == ' '
                        #resultText = resultText[:-1] # Remove the space from the underline otherwise we'll get two spaces
                    if lastChar != '_' and (not underlineGroupFlag) and hangingUnderlineCount!=0:
                        #if underlineGroup: print( "underlineGroup was: {!r}".format( underlineGroup ) )
                        underlineGroup = ''
                #if lastChar == ' ':
                    #startsWithUnderline =  char == '_'
                    #if char == ' ': hangingUnderlineCount += 1
                elif char == '_':
                    if lastChar == ' ':
                        hangingUnderlineCount -= 1
                        if hangingUnderlineCount < 0:
                            loadErrors.append( _("{} {}:{} missing first part of ESFM underline group at position {}").format( self.BBB, C, V, j ) )
                            logging.error( "ESFM underlining error at {} in {} {}:{}".format( j, BBB, C, V ) )
                            self.addPriorityError( 10, C, V, _("Missing first part of ESFM underline group") )
                            hangingUnderlineCount = 0 # recover

                if bracedGroupFlag:
                    if char == '}': bracedGroupFlag = False
                    else: bracedGroupText += '_' if char==' ' else char

                # Handle formation of output string but with tagged text converted into internal SFM fields
                #     e.g., 'And_ Elohim=G=SH430 _said=SH559:'
                #   becomes 'And_ Elohim\sem G Elohim\sem*\str H 430=Elohim\str* _said\str H 559=said\str*:'
                if tagText:
                    if BibleOrgSysGlobals.strictCheckingFlag or BibleOrgSysGlobals.debugFlag: assert tagText[0] == '='
                    if char in ' _=' or char in BibleOrgSysGlobals.ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
                        if underlineGroupFlag:
                            underlineGroup += word
                            if char == '_': underlineGroup += char
                            else: underlineGroupFlag = False
                        if len(tagText) > 1:
                            if tagText[1]=='S':
                                resultText += saveStrongsTag( BBB, C, V, underlineGroup if underlineGroup else word, tagText )
                                underlineGroup = ''
                                underlineGroupFlag = hangingUnderlineFlag = False
                            elif bracedGroupText or word:
                                resultText += saveSemanticTag( BBB, C, V, bracedGroupText if bracedGroupText else word, tagText )
                            else: # WEB Luke 16:7 contains a footnote: \f + \ft 100 cors = about 2,110 liters or 600 bushels.\f*
                                logging.critical( "Something funny with special symbol {!r} at {} {}:{}".format( char, BBB, C, V ) )
                                if BibleOrgSysGlobals.debugFlag or debuggingThisModule: halt
                            if char == '_':
                                if not underlineGroupFlag: # it's just starting now
                                    underlineGroup += word + char
                                    underlineGroupFlag = True
                                char = ' ' # to go into resultText
                            elif char != '=': underlineGroupFlag = False
                            if char == '=': tagText = char # Started a new consecutive tag
                            else:
                                if word:
                                    saveWord( BBB, C, V, word )
                                    firstWordFlag = False
                                word = bracedGroupText = tagText = ''
                                if char!='}': resultText += char
                        else:
                            loadErrors.append( _("{} {}:{} unexpected short ESFM tag at {}={!r} in {!r}").format( self.BBB, C, V, j, originalChar, originalText ) )
                            logging.error( "ESFM tagging error in {} {}:{}: unexpected short tag at {}={!r} in {!r}".format( BBB, C, V, j, originalChar, originalText ) )
                            self.addPriorityError( 21, C, V, _("Unexpected ESFM short tag") )
                    else: # still in tag
                        tagText += char
                else: # not in tag
                    if char == '=':
                        assert not tagText
                        tagText = char
                    else: # still not in tag
                        if char == '{':
                            if (lastChar and lastChar!=' ') or tagText or bracedGroupFlag or bracedGroupText:
                                loadErrors.append( _("{} {}:{} unexpected ESFM opening brace at {}={!r} in {!r}").format( self.BBB, C, V, j, originalChar, originalText ) )
                                logging.error( "ESFM tagging error in {} {}:{}: unexpected opening brace at {}={!r} in {!r}".format( BBB, C, V, j, originalChar, originalText ) )
                                self.addPriorityError( 20, C, V, _("Unexpected ESFM opening brace") )
                            bracedGroupFlag = True
                            char = '' # nothing to go into resultText
                        elif char in ' _' or char in BibleOrgSysGlobals.DASH_CHARS:
                            if underlineGroupFlag:
                                underlineGroup += word
                                if char == '_':
                                    underlineGroup += char
                                    #char = ' ' # to go into resultText
                                else: underlineGroupFlag = False
                            elif char == ' ':
                                underlineGroupFlag = False
                                if startsWithUnderline:
                                    underlineGroup += word
                                    startsWithUnderline = False
                            elif char == '_':
                                if hangingUnderlineCount > 0:
                                    #char = '' # nothing to go into resultText
                                    #hangingUnderlineCount -= 1# underlineGroupFlag will be set instead below
                                    pass
                                else: # not hanging underline
                                    underlineGroup += word + char
                                    #char = ' ' # to go into resultText
                                underlineGroupFlag = True
                            if word:
                                if marker == 'v' and not firstWordFlag:
                                    saveWord( BBB, C, V, word )
                                firstWordFlag = False
                            word = ''
                        elif char!='}': word += char
                        if char!='}': resultText += char
                lastChar = originalChar

            #else: # TEMP: just remove all ESFM tags and special characters
                #inTag = False
                #for char in originalText:
                    #if inTag:
                        #if char in ' _' or char in BibleOrgSysGlobals.ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
                            #inTag = False
                            #resultText += char
                    #else: # not in tag
                        #if char == '=': inTag = True; continue
                        #resultText += char
                #resultText = resultText.replace('{','').replace('}','').replace('_(',' ').replace(')_',' ').replace('_',' ')

            if debuggingThisModule and resultText != originalText:
                print( "from: {!r}".format( originalText ) )
                print( " got: {!r}".format( resultText ) )
                #assert originalText.count('_') == resultText.count('_') Not necessarily true
            elif BibleOrgSysGlobals.strictCheckingFlag or (BibleOrgSysGlobals.debugFlag and debuggingThisModule) \
            and ('{'  in originalText or '}' in originalText or '=' in originalText):
                print( "original:", repr(originalText) )
                print( "returned:", repr(resultText) )

            return resultText
        # end of ESFMBibleBook.ESFMPreprocessing


        def doaddLine( originalMarker, originalText ):
            """
            Check for newLine markers within the line (if so, break the line)
                and save the information in our database.

            Also checks for matching underlines.

            Also convert ~ to a proper non-break space.
            """
            #if (debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 1) \
                #and (originalMarker not in ('c','v') or len(originalText)>5): # Don't display for "blank" lines (like '\v 10 ')
                #print( "ESFM doaddLine( {!r}, {!r} )".format( originalMarker, originalText ) )

            marker, text = originalMarker, originalText.replace( '~', ' ' )
            marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( originalMarker )
            if marker != originalMarker:
                loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
                logging.error( _("ESFM doesn't allow the unnumbered marker after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
                self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )

            if '\\' in text: # Check markers inside the lines
                markerList = BibleOrgSysGlobals.USFMMarkers.getMarkerListFromText( text )
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers
                    if insideMarker == '\\': # it's a free-standing backspace
                        loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) )
                        logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                        self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") )
                    elif BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly
                        if ix==0:
                            loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) )
                            logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                            self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) )
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine( marker, thisText )
                        ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( insideMarker ) # setup for the next line
                        if marker != insideMarker:
                            loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker within line \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
                            logging.error( _("ESFM doesn't allow the unnumbered marker within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
                            self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )

                if ix != 0: # We must have separated multiple lines
                    text = text[ix:] # Get the final bit of the line

            if '_' in text:
                # Should this code be somewhere more general, e.g., in InternalBibleBook.py ???
                leftCount, rightCount = text.count( '_ ' ), text.count( ' _' )
                if leftCount > rightCount:
                    loadErrors.append( _("{} {}:{} Too many '_ ' sequences in {} text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Too many '_ ' sequences in {} line after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                elif leftCount < rightCount:
                    loadErrors.append( _("{} {}:{} Too many ' _' sequences in {} text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Too many ' _' sequences in {} line after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )

            self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above)
        # end of ESFMBibleBook.doaddLine


        # Main code for ESFMBibleBook.load
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {}…").format( filename ) )
        #self.BBB = BBB
        #self.isSingleChapterBook = BibleOrgSysGlobals.BibleBooksCodes.isSingleChapterBook( BBB )
        self.sourceFilename = filename
        self.sourceFolder = folder
        self.sourceFilepath = os.path.join( folder, filename ) if folder else filename
        originalBook = ESFMFile()
        originalBook.read( self.sourceFilepath )

        # Do some important cleaning up before we save the data
        C, V = '-1', '-1' # So first/id line starts at -1:0
        lastMarker = lastText = ''
        loadErrors = []
        for marker,originalText in originalBook.lines: # Always process a line behind in case we have to combine lines
            #print( "After {} {}:{} \\{} {!r}".format( self.BBB, C, V, marker, originalText ) )

            # Keep track of where we are for more helpful error messages
            if marker=='c' and originalText: C, V = originalText.split()[0], '0'
            elif marker=='v' and originalText:
                V = originalText.split()[0]
                if C == '-1': C = '1' # Some single chapter books don't have an explicit chapter 1 marker
            elif C == '-1' and marker!='intro': V = str( int(V) + 1 )
            elif marker=='restore': continue # Ignore these lines completely

            # Now load the actual Bible book data
            if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS:
                text = originalText
            else:
                text = ESFMPreprocessing( self.BBB, C, V, marker, originalText ) # Convert ESFM encoding to pseudo-USFM
            if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( marker ):
                if lastMarker: doaddLine( lastMarker, lastText )
                lastMarker, lastText = marker, text
            elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker[:-1] ): # the line begins with an internal marker -- append it to the previous line
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 27, C, V, _("Found \\{} internal marker on new line in file").format( marker ) )
                if not lastText.endswith(' '): lastText += ' ' # Not always good to add a space, but it's their fault!
                lastText +=  '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
            elif BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker[:-1] ): # the line begins with a note marker -- append it to the previous line
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 26, C, V, _("Found \\{} note marker on new line in file").format( marker ) )
                if not lastText.endswith(' ') and marker!='f': lastText += ' ' # Not always good to add a space, but it's their fault! Don't do it for footnotes, though.
                lastText +=  '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
            else: # the line begins with an unknown marker (ESFM doesn't allow custom markers)
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
                    logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 100, C, V, _("Found \\{} unknown marker on new line in file").format( marker ) )
                for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space
                    if marker.startswith( tryMarker ): # Let's try changing it
                        if lastMarker: doaddLine( lastMarker, lastText )
                        lastMarker, lastText = tryMarker, marker[len(tryMarker):] + ' ' + text
                        loadErrors.append( _("{} {}:{} Changed '\\{}' unknown marker to {!r} at beginning of line: {}").format( self.BBB, C, V, marker, tryMarker, text ) )
                        logging.warning( _("Changed '\\{}' unknown marker to {!r} after {} {}:{} at beginning of line: {}").format( marker, tryMarker, self.BBB, C, V, text ) )
                        break
                # Otherwise, don't bother processing this line -- it'll just cause more problems later on
        if lastMarker: doaddLine( lastMarker, lastText ) # Process the final line

        if not originalBook.lines: # There were no lines!!!
            loadErrors.append( _("{} This ESFM file was totally empty: {}").format( self.BBB, self.sourceFilename ) )
            logging.error( _("ESFM file for {} was totally empty: {}").format( self.BBB, self.sourceFilename ) )
            lastMarker, lastText = 'rem', 'This (ESFM) file was completely empty' # Save something since we had a file at least

        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        if 0 and BibleOrgSysGlobals.debugFlag and self.BBB=='JNA':
            for name,thisDict in  ( ('SEM',self.containerBibleObject.semanticDict), ('STR',self.containerBibleObject.StrongsDict) ):
                if 'Tag errors' in thisDict:
                    print( "\n{} Tag errors: {}".format( name, thisDict['Tag errors'] ) )
                if 'Missing' in thisDict:
                    print( "\n{} Missing: {}".format( name, thisDict['Missing'] ) )
                if thisDict == self.containerBibleObject.semanticDict:
                    for tag in ESFM_SEMANTIC_TAGS:
                        if tag in thisDict:
                            print( "\n{} Found {}: {}".format( name, tag, thisDict[tag] ) )
                elif thisDict == self.containerBibleObject.StrongsDict:
                    for tag in ESFM_STRONGS_TAGS:
                        for num in thisDict[tag]:
                            if isinstance( thisDict[tag][num], list ):
                                print( "\n{} Found {} {}: {}".format( name, tag, num, thisDict[tag][num] ) )
            halt
Ejemplo n.º 8
0
    def load( self, filename, folder=None ):
        """
        Load the ESFM Bible book from a file.

        Tries to combine physical lines into logical lines,
            i.e., so that all lines begin with a ESFM paragraph marker.

        Uses the addLine function of the base class to save the lines.

        Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
                we don't need to worry about that here.
        """
        if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            print( "ESFM.load( {}, {} )".format( filename, folder ) )


        def ESFMPreprocessing( BBB, C, V, marker, originalText ):
            """
            Converts ESFM tagging to pseudo-USFM codes for easier handling later on.

            Parameters:
                BBB, C, V parameters are just for use in error messages
                originalText is the text line from the file

            Returns:
                A string replacement to use instead of originalText

            Converts:
                XXX=PYYYY to \dic PXXX=YYY\dic*
                    e.g., "{the three lepers}=PMat6Lepers" to "the three lepers\dic Pthe_three_lepers=Mat6lepers\dic*"
                i.e, braces and equal signs are removed from the text
                    and the information is placed in a \dic field.

            Note: This DOESN'T remove the underline/underscore characters used to join translated words
                which were one word in the original, e.g., went_down
            """
            if (debuggingThisModule or BibleOrgSysGlobals.debugFlag) \
            and len(originalText)>5: # Don't display for "blank" lines (like '\v 10 ')
                print( "\n\nESFMPreprocessing( {} {}:{}, {}, {!r} )".format( BBB, C, V, marker, originalText ) )


            def saveWord( BBB, C, V, word ):
                """
                """
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
                    print( "ESFM saveWord( {}, {}:{}, {!r} )".format( BBB, C, V, word ) )
                assert word and ' ' not in word
            # end of saveWord

            def saveSemanticTag( BBB, C, V, word, tag ):
                """
                Fills the semantic dictionary with keys:
                    'Tag errors': contains a list of 4-tuples (BBB,C,V,errorWord)
                    'Missing': contains a dictionary
                    'A' 'G' 'L' 'O' 'P' 'Q' entries each containing a dictionary
                        where the key is the name (e.g., 'Jonah')
                        and the entry is a list of 4-tuples (BBB,C,V,actualWord)

                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
                    print( "ESFM saveSemanticTag( {} {}:{}, {!r}, {!r} )".format( BBB, C, V, word, tag ) )
                assert word and ' ' not in word
                assert tag and tag[0]=='=' and len(tag)>=2
                tagMarker, tagContent = tag[1], tag[2:]

                thisDict = self.containerBibleObject.semanticDict
                if tagMarker not in ESFM_SEMANTIC_TAGS:
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag in {!r}".format( BBB, C, V, tagMarker, tag ) )
                    self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
                    if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
                if not tagContent: tagContent = word

                # Now look in the semantic database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else: # couldn't find it
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag content {!r}".format( BBB, C, V, tagMarker, tagContent ) )
                    self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )

                if word==tagContent:
                    return "\\sem {} {}\\sem*".format( tagMarker, word )
                return "\\sem {} {}={}\\sem*".format( tagMarker, word, tagContent )
            # end of saveSemanticTag


            def saveStrongsTag( BBB, C, V, word, tag ):
                """
                Returns a character SFM field to be inserted into the line
                    (for better compatibility with the software chain).
                """
                #if C=='4' and V in ('11','12'):
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
                    print( "ESFM saveStrongsTag( {}, {}:{}, {!r}, {!r} )".format( BBB, C, V, word, tag ) )
                assert word and ' ' not in word
                assert tag and tag[0]=='=' and tag[1]=='S' and len(tag)>=3
                tagMarker, tagContent = tag[2], tag[3:]

                thisDict = self.containerBibleObject.StrongsDict
                if tagMarker not in ESFM_STRONGS_TAGS:
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag in {!r}".format( BBB, C, V, tagMarker, tag ) )
                    self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
                    if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
                    thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
                if not tagContent: tagContent = word

                # Now look in the Strongs database
                if tagMarker in thisDict \
                and tagContent in thisDict[tagMarker]:
                    thisEntry = thisDict[tagMarker][tagContent]
                    if isinstance( thisEntry, str ):
                        thisDict[tagMarker][tagContent] = [thisEntry] # Convert from a string to a list with the string as the first list item
                    thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
                    #print( " ", tagMarker, tagContent, thisEntry )
                    #print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
                else: # couldn't find it
                    loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
                    logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag content {!r}".format( BBB, C, V, tagMarker, tagContent ) )
                    self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
                    if 'Missing' not in thisDict: thisDict['Missing'] = {}
                    if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
                    if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
                    thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )

                return "\\str {} {}={}\\str*".format( tagMarker, tagContent, word )
            # end of saveStrongsTag


            # Main code for ESFMPreprocessing
            # Analyse and collect all ESFM tags and special characters,
            #    and put the results into USFM type character fields
            bracedGroupFlag = underlineGroupFlag = startsWithUnderline = False
            word = underlineGroup = bracedGroupText = tagText = ''
            # The tag is the bit starting with =, e.g., '=PJonah'
            hangingUnderlineCount = 0 # Count of unclosed '…_ ' sequences
            lastChar = ''
            #textLen = len( originalText )
            resultText = ''
            firstWordFlag = True
            #print( 'ESFMPreprocessing {} {}:{}'.format( BBB, C, V ) )
            for j, originalChar in enumerate( originalText ):
                char = originalChar
                #nextChar = originalText[j+1] if j<textLen-1 else ''

                #if '{'  in originalText or '_' in originalText or '=' in originalText:
                #if C=='4' and V=='11':
                #print( "  ESFMPreprocessing {}={!r} lc={!r} uGF={} hUC={} uL={!r} bGF={} bG={!r} tg={!r} \n    oT={!r} \n    rT={!r}" \
                    #.format( j, originalChar, lastChar, underlineGroupFlag, hangingUnderlineCount, underlineGroup, bracedGroupFlag, bracedGroup, tag, originalText, resultText ) )

                # Handle hanging underlines, e.g., 'and_ ' or ' _then' or 'and_ they_ _were_ _not _ashamed'
                if char == ' ':
                    if lastChar == '_':
                        hangingUnderlineCount += 1
                        assert hangingUnderlineCount < 3
                        #assert resultText[-1] == ' '
                        #resultText = resultText[:-1] # Remove the space from the underline otherwise we'll get two spaces
                    if lastChar != '_' and (not underlineGroupFlag) and hangingUnderlineCount!=0:
                        #if underlineGroup: print( "underlineGroup was: {!r}".format( underlineGroup ) )
                        underlineGroup = ''
                #if lastChar == ' ':
                    #startsWithUnderline =  char == '_'
                    #if char == ' ': hangingUnderlineCount += 1
                elif char == '_':
                    if lastChar == ' ':
                        hangingUnderlineCount -= 1
                        if hangingUnderlineCount < 0:
                            loadErrors.append( _("{} {}:{} missing first part of ESFM underline group at position {}").format( self.BBB, C, V, j ) )
                            logging.error( "ESFM underlining error at {} in {} {}:{}".format( j, BBB, C, V ) )
                            self.addPriorityError( 10, C, V, _("Missing first part of ESFM underline group") )
                            hangingUnderlineCount = 0 # recover

                if bracedGroupFlag:
                    if char == '}': bracedGroupFlag = False
                    else: bracedGroupText += '_' if char==' ' else char

                # Handle formation of output string but with tagged text converted into internal SFM fields
                #     e.g., 'And_ Elohim=G=SH430 _said=SH559:'
                #   becomes 'And_ Elohim\sem G Elohim\sem*\str H 430=Elohim\str* _said\str H 559=said\str*:'
                if tagText:
                    if BibleOrgSysGlobals.strictCheckingFlag or BibleOrgSysGlobals.debugFlag: assert tagText[0] == '='
                    if char in ' _=' or char in BibleOrgSysGlobals.ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
                        if underlineGroupFlag:
                            underlineGroup += word
                            if char == '_': underlineGroup += char
                            else: underlineGroupFlag = False
                        if len(tagText) > 1:
                            if tagText[1]=='S':
                                resultText += saveStrongsTag( BBB, C, V, underlineGroup if underlineGroup else word, tagText )
                                underlineGroup = ''
                                underlineGroupFlag = hangingUnderlineFlag = False
                            elif bracedGroupText or word:
                                resultText += saveSemanticTag( BBB, C, V, bracedGroupText if bracedGroupText else word, tagText )
                            else: # WEB Luke 16:7 contains a footnote: \f + \ft 100 cors = about 2,110 liters or 600 bushels.\f*
                                logging.critical( "Something funny with special symbol {!r} at {} {}:{}".format( char, BBB, C, V ) )
                                if BibleOrgSysGlobals.debugFlag or debuggingThisModule: halt
                            if char == '_':
                                if not underlineGroupFlag: # it's just starting now
                                    underlineGroup += word + char
                                    underlineGroupFlag = True
                                char = ' ' # to go into resultText
                            elif char != '=': underlineGroupFlag = False
                            if char == '=': tagText = char # Started a new consecutive tag
                            else:
                                if word:
                                    saveWord( BBB, C, V, word )
                                    firstWordFlag = False
                                word = bracedGroupText = tagText = ''
                                if char!='}': resultText += char
                        else:
                            loadErrors.append( _("{} {}:{} unexpected short ESFM tag at {}={!r} in {!r}").format( self.BBB, C, V, j, originalChar, originalText ) )
                            logging.error( "ESFM tagging error in {} {}:{}: unexpected short tag at {}={!r} in {!r}".format( BBB, C, V, j, originalChar, originalText ) )
                            self.addPriorityError( 21, C, V, _("Unexpected ESFM short tag") )
                    else: # still in tag
                        tagText += char
                else: # not in tag
                    if char == '=':
                        assert not tagText
                        tagText = char
                    else: # still not in tag
                        if char == '{':
                            if (lastChar and lastChar!=' ') or tagText or bracedGroupFlag or bracedGroupText:
                                loadErrors.append( _("{} {}:{} unexpected ESFM opening brace at {}={!r} in {!r}").format( self.BBB, C, V, j, originalChar, originalText ) )
                                logging.error( "ESFM tagging error in {} {}:{}: unexpected opening brace at {}={!r} in {!r}".format( BBB, C, V, j, originalChar, originalText ) )
                                self.addPriorityError( 20, C, V, _("Unexpected ESFM opening brace") )
                            bracedGroupFlag = True
                            char = '' # nothing to go into resultText
                        elif char in ' _' or char in BibleOrgSysGlobals.DASH_CHARS:
                            if underlineGroupFlag:
                                underlineGroup += word
                                if char == '_':
                                    underlineGroup += char
                                    #char = ' ' # to go into resultText
                                else: underlineGroupFlag = False
                            elif char == ' ':
                                underlineGroupFlag = False
                                if startsWithUnderline:
                                    underlineGroup += word
                                    startsWithUnderline = False
                            elif char == '_':
                                if hangingUnderlineCount > 0:
                                    #char = '' # nothing to go into resultText
                                    #hangingUnderlineCount -= 1# underlineGroupFlag will be set instead below
                                    pass
                                else: # not hanging underline
                                    underlineGroup += word + char
                                    #char = ' ' # to go into resultText
                                underlineGroupFlag = True
                            if word:
                                if marker == 'v' and not firstWordFlag:
                                    saveWord( BBB, C, V, word )
                                firstWordFlag = False
                            word = ''
                        elif char!='}': word += char
                        if char!='}': resultText += char
                lastChar = originalChar

            #else: # TEMP: just remove all ESFM tags and special characters
                #inTag = False
                #for char in originalText:
                    #if inTag:
                        #if char in ' _' or char in BibleOrgSysGlobals.ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
                            #inTag = False
                            #resultText += char
                    #else: # not in tag
                        #if char == '=': inTag = True; continue
                        #resultText += char
                #resultText = resultText.replace('{','').replace('}','').replace('_(',' ').replace(')_',' ').replace('_',' ')

            if debuggingThisModule and resultText != originalText:
                print( "from: {!r}".format( originalText ) )
                print( " got: {!r}".format( resultText ) )
                #assert originalText.count('_') == resultText.count('_') Not necessarily true
            elif BibleOrgSysGlobals.strictCheckingFlag or (BibleOrgSysGlobals.debugFlag and debuggingThisModule) \
            and ('{'  in originalText or '}' in originalText or '=' in originalText):
                print( "original:", repr(originalText) )
                print( "returned:", repr(resultText) )

            return resultText
        # end of ESFMBibleBook.ESFMPreprocessing


        def doaddLine( originalMarker, originalText ):
            """
            Check for newLine markers within the line (if so, break the line)
                and save the information in our database.

            Also checks for matching underlines.

            Also convert ~ to a proper non-break space.
            """
            #if (debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 1) \
                #and (originalMarker not in ('c','v') or len(originalText)>5): # Don't display for "blank" lines (like '\v 10 ')
                #print( "ESFM doaddLine( {!r}, {!r} )".format( originalMarker, originalText ) )

            marker, text = originalMarker, originalText.replace( '~', ' ' )
            marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( originalMarker )
            if marker != originalMarker:
                loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
                logging.error( _("ESFM doesn't allow the unnumbered marker after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
                self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )

            if '\\' in text: # Check markers inside the lines
                markerList = BibleOrgSysGlobals.USFMMarkers.getMarkerListFromText( text )
                ix = 0
                for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers
                    if insideMarker == '\\': # it's a free-standing backspace
                        loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) )
                        logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                        self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") )
                    elif BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly
                        if ix==0:
                            loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) )
                            logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line
                            self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) )
                        thisText = text[ix:iMIndex].rstrip()
                        self.addLine( marker, thisText )
                        ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash
                        #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
                        marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( insideMarker ) # setup for the next line
                        if marker != insideMarker:
                            loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker within line \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
                            logging.error( _("ESFM doesn't allow the unnumbered marker within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
                            self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )

                if ix != 0: # We must have separated multiple lines
                    text = text[ix:] # Get the final bit of the line

            if '_' in text:
                # Should this code be somewhere more general, e.g., in InternalBibleBook.py ???
                leftCount, rightCount = text.count( '_ ' ), text.count( ' _' )
                if leftCount > rightCount:
                    loadErrors.append( _("{} {}:{} Too many '_ ' sequences in {} text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Too many '_ ' sequences in {} line after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                elif leftCount < rightCount:
                    loadErrors.append( _("{} {}:{} Too many ' _' sequences in {} text: {}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Too many ' _' sequences in {} line after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )

            self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above)
        # end of ESFMBibleBook.doaddLine


        # Main code for ESFMBibleBook.load
        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  " + _("Loading {}…").format( filename ) )
        #self.BBB = BBB
        #self.isSingleChapterBook = BibleOrgSysGlobals.BibleBooksCodes.isSingleChapterBook( BBB )
        self.sourceFilename = filename
        self.sourceFolder = folder
        self.sourceFilepath = os.path.join( folder, filename ) if folder else filename
        originalBook = ESFMFile()
        originalBook.read( self.sourceFilepath )

        # Do some important cleaning up before we save the data
        C, V = '-1', '-1' # So first/id line starts at -1:0
        lastMarker = lastText = ''
        loadErrors = []
        for marker,originalText in originalBook.lines: # Always process a line behind in case we have to combine lines
            #print( "After {} {}:{} \\{} {!r}".format( self.BBB, C, V, marker, originalText ) )

            # Keep track of where we are for more helpful error messages
            if marker=='c' and originalText: C, V = originalText.split()[0], '0'
            elif marker=='v' and originalText:
                V = originalText.split()[0]
                if C == '-1': C = '1' # Some single chapter books don't have an explicit chapter 1 marker
            elif C == '-1' and marker!='intro': V = str( int(V) + 1 )
            elif marker=='restore': continue # Ignore these lines completely

            # Now load the actual Bible book data
            if marker in USFMMarkers.OFTEN_IGNORED_USFM_HEADER_MARKERS:
                text = originalText
            else:
                text = ESFMPreprocessing( self.BBB, C, V, marker, originalText ) # Convert ESFM encoding to pseudo-USFM
            if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( marker ):
                if lastMarker: doaddLine( lastMarker, lastText )
                lastMarker, lastText = marker, text
            elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker[:-1] ): # the line begins with an internal marker -- append it to the previous line
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 27, C, V, _("Found \\{} internal marker on new line in file").format( marker ) )
                if not lastText.endswith(' '): lastText += ' ' # Not always good to add a space, but it's their fault!
                lastText +=  '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
            elif BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker ) \
            or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker[:-1] ): # the line begins with a note marker -- append it to the previous line
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
                    logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
                    logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 26, C, V, _("Found \\{} note marker on new line in file").format( marker ) )
                if not lastText.endswith(' ') and marker!='f': lastText += ' ' # Not always good to add a space, but it's their fault! Don't do it for footnotes, though.
                lastText +=  '\\' + marker + ' ' + text
                if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
            else: # the line begins with an unknown marker (ESFM doesn't allow custom markers)
                if text:
                    loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
                    logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
                else: # no text
                    loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line (with no text").format( self.BBB, C, V, marker ) )
                    logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
                self.addPriorityError( 100, C, V, _("Found \\{} unknown marker on new line in file").format( marker ) )
                for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space
                    if marker.startswith( tryMarker ): # Let's try changing it
                        if lastMarker: doaddLine( lastMarker, lastText )
                        lastMarker, lastText = tryMarker, marker[len(tryMarker):] + ' ' + text
                        loadErrors.append( _("{} {}:{} Changed '\\{}' unknown marker to {!r} at beginning of line: {}").format( self.BBB, C, V, marker, tryMarker, text ) )
                        logging.warning( _("Changed '\\{}' unknown marker to {!r} after {} {}:{} at beginning of line: {}").format( marker, tryMarker, self.BBB, C, V, text ) )
                        break
                # Otherwise, don't bother processing this line -- it'll just cause more problems later on
        if lastMarker: doaddLine( lastMarker, lastText ) # Process the final line

        if not originalBook.lines: # There were no lines!!!
            loadErrors.append( _("{} This ESFM file was totally empty: {}").format( self.BBB, self.sourceFilename ) )
            logging.error( _("ESFM file for {} was totally empty: {}").format( self.BBB, self.sourceFilename ) )
            lastMarker, lastText = 'rem', 'This (ESFM) file was completely empty' # Save something since we had a file at least

        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        if 0 and BibleOrgSysGlobals.debugFlag and self.BBB=='JNA':
            for name,thisDict in  ( ('SEM',self.containerBibleObject.semanticDict), ('STR',self.containerBibleObject.StrongsDict) ):
                if 'Tag errors' in thisDict:
                    print( "\n{} Tag errors: {}".format( name, thisDict['Tag errors'] ) )
                if 'Missing' in thisDict:
                    print( "\n{} Missing: {}".format( name, thisDict['Missing'] ) )
                if thisDict == self.containerBibleObject.semanticDict:
                    for tag in ESFM_SEMANTIC_TAGS:
                        if tag in thisDict:
                            print( "\n{} Found {}: {}".format( name, tag, thisDict[tag] ) )
                elif thisDict == self.containerBibleObject.StrongsDict:
                    for tag in ESFM_STRONGS_TAGS:
                        for num in thisDict[tag]:
                            if isinstance( thisDict[tag][num], list ):
                                print( "\n{} Found {} {}: {}".format( name, tag, num, thisDict[tag][num] ) )
            halt