Ejemplo n.º 1
0
    def getMaximumPossibleFilenameTuples( self, strictCheck=False ):
        """
        Find the method that finds the maximum number of USFM Bible files.
            The result is a list of 2-tuples in the default rough sequence order from the BibleBooksCodes module.
                Each tuple contains ( BBB, filename ) not including the folder path.
        """
        #if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "getMaximumPossibleFilenameTuples( {} )".format( strictCheck ) )

        resultString, resultList = 'Confirmed', self.getConfirmedFilenameTuples()
        resultListExt = self.getPossibleFilenameTuplesExt()
        if len(resultListExt) > len(resultList):
            resultString, resultList = 'External', resultListExt
        resultListInt = self.getPossibleFilenameTuplesInt()
        if len(resultListInt) > len(resultList):
            resultString, resultList = 'Internal', resultListInt
        vPrint( 'Info', debuggingThisModule, f"getMaximumPossibleFilenameTuples: using {resultString} ({len(resultList)})" )

        if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
            #if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "  getMaximumPossibleFilenameTuples doing strictCheck…" )
            for BBB,filename in resultList.copy():
                firstLine = BibleOrgSysGlobals.peekIntoFile( filename, self.givenFolderName )
                #dPrint( 'Quiet', debuggingThisModule, 'UFN', repr(firstLine) )
                if firstLine is None: resultList.remove( (BBB,filename) ); continue # seems we couldn't decode the file
                if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
                    logging.info( "USFMBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
                    firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
                if not firstLine or firstLine[0] != '\\': # don't allow a blank first line and must start with a backslash
                    resultList.remove( (BBB,filename) )

        self.lastTupleList = resultList
        #dPrint( 'Quiet', debuggingThisModule, "getMaximumPossibleFilenameTuples is returning", resultList )
        return resultList # No need to sort these, coz all the above calls produce sorted results
Ejemplo n.º 2
0
    def getConfirmedFilenameTuples(self, strictCheck: bool = False):
        """
        Return a list of tuples of UPPER CASE book codes with actual (present and readable) USX filenames.
            If the strictCheck flag is set, the program also looks at the first line(s) inside the files.

            The result is a list of 2-tuples in the default rough sequence order from the BibleBooksCodes module.
                Each tuple contains ( BBB, filename ) not including the folder path.
        """
        resultList = []
        for BBB, possibleFilename in self.getDerivedFilenameTuples():
            possibleFilepath = os.path.join(self.givenFolderName,
                                            possibleFilename)
            #vPrint( 'Quiet', debuggingThisModule, '  Looking for: ' + possibleFilename )
            if os.access(possibleFilepath, os.R_OK):
                #vPrint( 'Quiet', debuggingThisModule, "possibleFilepath", possibleFilepath )
                #USXBookCode = possibleFilename[self.USXBookCodeIndex:self.USXBookCodeIndex+3].upper()
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLines = BibleOrgSysGlobals.peekIntoFile(
                        possibleFilename, self.givenFolderName, numLines=3)
                    #vPrint( 'Quiet', debuggingThisModule, "firstLinesGCFT", firstLines )
                    if not firstLines or len(firstLines) < 3: continue
                    if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
                    and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                        vPrint(
                            'Verbose', debuggingThisModule,
                            "USXB (unexpected) first line was {!r} in {}".
                            format(firstLines, thisFilename))
                    if '<usx' not in firstLines[
                            0] and '<usx' not in firstLines[1]:
                        continue  # so it doesn't get added
                resultList.append((
                    BBB,
                    possibleFilename,
                ))
        return resultList  # No need to sort these, coz the above call produce sorted results
Ejemplo n.º 3
0
    def getPossibleFilenameTuples(self,
                                  strictCheck: bool = False
                                  ) -> List[Tuple[str, str]]:
        """
        Return a list of filenames just derived from the list of files in the folder,
                i.e., look only externally at the filenames.
            If the strictCheck flag is set, the program also looks at the first line(s) inside the files.
        """
        fnPrint(debuggingThisModule,
                f"USXFilenames.getPossibleFilenameTuples( {strictCheck} )")
        #dPrint( 'Quiet', debuggingThisModule, "self.fileList", len(self.fileList), self.fileList )

        resultList = []
        for possibleFilename in self.fileList:
            # dPrint( 'Quiet', debuggingThisModule, f"  USXFilenames.getPossibleFilenameTuples looking for: {possibleFilename}" )
            pFUpper = possibleFilename.upper()
            if pFUpper in filenamesToIgnore: continue
            pFUpperProper, pFUpperExt = os.path.splitext(pFUpper)
            for USFMBookCode, USFMDigits, BBB in self._USFMBooksCodeNumberTriples:
                ignore = False
                for ending in filenameEndingsToIgnore:
                    if pFUpper.endswith(ending):
                        ignore = True
                        break
                if ignore: continue
                checkString = pFUpperProper[
                    3:] if self.pattern == 'dddBBB' else pFUpperProper
                # Otherwise 051COL.usx gets confused between 1Co and Col
                if USFMBookCode.upper() in checkString:
                    if pFUpper[-1] != '~' and not pFUpperExt[
                            1:] in extensionsToIgnore:  # Compare without the first dot
                        if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                            firstLines = BibleOrgSysGlobals.peekIntoFile(
                                possibleFilename,
                                self.givenFolderName,
                                numLines=3)
                            if not firstLines or len(firstLines) < 3:
                                continue
                            if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
                            and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                                vPrint(
                                    'Verbose', debuggingThisModule,
                                    "USXB (unexpected) first line was {!r} in {}"
                                    .format(firstLines, thisFilename))
                            if '<usx' not in firstLines[
                                    0] and '<usx' not in firstLines[1]:
                                continue  # so it doesn't get added
                        self.doListAppend(
                            BibleOrgSysGlobals.loadedBibleBooksCodes.
                            getBBBFromUSFMAbbreviation(USFMBookCode),
                            possibleFilename, resultList,
                            "getPossibleFilenameTuplesExt")
        self.lastTupleList = resultList
        #dPrint( 'Quiet', debuggingThisModule, "final resultList", len(resultList), resultList )
        return BibleOrgSysGlobals.loadedBibleBooksCodes.getSequenceList(
            resultList)
Ejemplo n.º 4
0
def UnboundBibleFileCheck(givenFolderName,
                          strictCheck: bool = True,
                          autoLoad: bool = False,
                          autoLoadBooks: bool = False):
    """
    Given a folder, search for Unbound Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one Unbound Bible is found,
        returns the loaded UnboundBible object.
    """
    vPrint(
        'Info', debuggingThisModule,
        "UnboundBibleFileCheck( {}, {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, str)
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (
            True,
            False,
    )

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("UnboundBibleFileCheck: Given {!r} folder is unreadable").format(
                givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("UnboundBibleFileCheck: Given {!r} path is not a folder").format(
                givenFolderName))
        return False

    # Find all the files and folders in this folder
    vPrint(
        'Verbose', debuggingThisModule,
        " UnboundBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore: continue
            if not somethingUpperExt[
                    1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)

    # See if there's an UnboundBible project here in this given folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename in ('book_names.txt', 'Readme.txt'):
            looksHopeful = True
        elif thisFilename.endswith('_utf8.txt'):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(
                    thisFilename, givenFolderName)
                if firstLine is None:
                    continue  # seems we couldn't decode the file
                if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)":
                    vPrint(
                        'Verbose', debuggingThisModule,
                        "UnB (unexpected) first line was {!r} in {}".format(
                            firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        vPrint('Info', debuggingThisModule, "UnboundBibleFileCheck got",
               numFound, givenFolderName, lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = UnboundBible(
                givenFolderName, lastFilenameFound[:-9]
            )  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        vPrint('Quiet', debuggingThisModule,
               "    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("UnboundBibleFileCheck: {!r} subfolder is unreadable").
                format(tryFolderName))
            continue
        vPrint(
            'Verbose', debuggingThisModule,
            "    UnboundBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        try:
            for something in os.listdir(tryFolderName):
                somepath = os.path.join(givenFolderName, thisFolderName,
                                        something)
                if os.path.isdir(somepath): foundSubfolders.append(something)
                elif os.path.isfile(somepath):
                    somethingUpper = something.upper()
                    somethingUpperProper, somethingUpperExt = os.path.splitext(
                        somethingUpper)
                    ignore = False
                    for ending in filenameEndingsToIgnore:
                        if somethingUpper.endswith(ending):
                            ignore = True
                            break
                    if ignore: continue
                    if not somethingUpperExt[
                            1:] in extensionsToIgnore:  # Compare without the first dot
                        foundSubfiles.append(something)
        except PermissionError:
            pass  # can't read folder, e.g., system folder

        # See if there's an UB project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith('_utf8.txt'):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(
                        thisFilename, tryFolderName)
                    if firstLine is None:
                        continue  # seems we couldn't decode the file
                    if firstLine != "#THE UNBOUND BIBLE (www.unboundbible.org)":
                        vPrint(
                            'Verbose', debuggingThisModule,
                            "UnB (unexpected) first line was {!r} in {}".
                            format(firstLine, thisFilename))
                        halt
                        continue
                foundProjects.append((
                    tryFolderName,
                    thisFilename,
                ))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        vPrint('Info', debuggingThisModule,
               "UnboundBibleFileCheck foundProjects", numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            uB = UnboundBible(
                foundProjects[0][0], foundProjects[0][1]
                [:-9])  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound
Ejemplo n.º 5
0
def VerseViewXMLBibleFileCheck(givenFolderName,
                               strictCheck: bool = True,
                               autoLoad: bool = False,
                               autoLoadBooks: bool = False):
    """
    Given a folder, search for VerseView XML Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number found.

    if autoLoad is true and exactly one VerseView Bible is found,
        returns the loaded VerseViewXMLBible object.
    """
    vPrint(
        'Info', debuggingThisModule,
        "VerseViewXMLBibleFileCheck( {}, {}, {}, {} )".format(
            givenFolderName, strictCheck, autoLoad, autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, str)
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (
            True,
            False,
    )

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("VerseViewXMLBibleFileCheck: Given {!r} folder is unreadable").
            format(givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("VerseViewXMLBibleFileCheck: Given {!r} path is not a folder").
            format(givenFolderName))
        return False

    # Find all the files and folders in this folder
    vPrint(
        'Verbose', debuggingThisModule,
        " VerseViewXMLBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith(ending):
                    ignore = True
                    break
            if ignore: continue
            if not somethingUpperExt[
                    1:] in extensionsToIgnore:  # Compare without the first dot
                foundFiles.append(something)
    #vPrint( 'Quiet', debuggingThisModule, 'ff', foundFiles )

    # See if there's an VerseView project here in this folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
            firstLines = BibleOrgSysGlobals.peekIntoFile(thisFilename,
                                                         givenFolderName,
                                                         numLines=3)
            if not firstLines or len(firstLines) < 3: continue
            if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
            and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                vPrint(
                    'Verbose', debuggingThisModule,
                    "VVB (unexpected) first line was {!r} in {}".format(
                        firstLines, thisFilename))
                continue
            if '<bible>' not in firstLines[1]: continue
            if '<fname>' not in firstLines[2]: continue
        lastFilenameFound = thisFilename
        numFound += 1
    if numFound:
        vPrint('Info', debuggingThisModule, "VerseViewXMLBibleFileCheck got",
               numFound, givenFolderName, lastFilenameFound)
        if numFound == 1 and (autoLoad and autoLoadBooks):
            ub = VerseViewXMLBible(givenFolderName, lastFilenameFound)
            if autoLoadBooks: ub.load()  # Load and process the file
            return ub
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2:
        vPrint('Quiet', debuggingThisModule,
               "    Looked hopeful but no actual files found")

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        vPrint(
            'Verbose', debuggingThisModule,
            "    VerseViewXMLBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        try:
            for something in os.listdir(tryFolderName):
                somepath = os.path.join(givenFolderName, thisFolderName,
                                        something)
                if os.path.isdir(somepath): foundSubfolders.append(something)
                elif os.path.isfile(somepath):
                    somethingUpper = something.upper()
                    somethingUpperProper, somethingUpperExt = os.path.splitext(
                        somethingUpper)
                    ignore = False
                    for ending in filenameEndingsToIgnore:
                        if somethingUpper.endswith(ending):
                            ignore = True
                            break
                    if ignore: continue
                    if not somethingUpperExt[
                            1:] in extensionsToIgnore:  # Compare without the first dot
                        foundSubfiles.append(something)
        except PermissionError:
            pass  # can't read folder, e.g., system folder
        #vPrint( 'Quiet', debuggingThisModule, 'fsf', foundSubfiles )

        # See if there's an OS project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLines = BibleOrgSysGlobals.peekIntoFile(thisFilename,
                                                             tryFolderName,
                                                             numLines=3)
                if not firstLines or len(firstLines) < 3: continue
                if not ( firstLines[0].startswith( '<?xml version="1.0"' ) or firstLines[0].startswith( "<?xml version='1.0'" ) ) \
                and not ( firstLines[0].startswith( '\ufeff<?xml version="1.0"' ) or firstLines[0].startswith( "\ufeff<?xml version='1.0'" ) ): # same but with BOM
                    vPrint(
                        'Verbose', debuggingThisModule,
                        "VVB (unexpected) first line was {!r} in {}".format(
                            firstLines, thisFilename))
                    continue
                if '<bible>' not in firstLines[1]: continue
                if '<fname>' not in firstLines[2]: continue
            foundProjects.append((
                tryFolderName,
                thisFilename,
            ))
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        vPrint('Info', debuggingThisModule,
               "VerseViewXMLBibleFileCheck foundProjects", numFound,
               foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            ub = VerseViewXMLBible(foundProjects[0][0],
                                   foundProjects[0][1])  # Folder and filename
            if autoLoadBooks: ub.load()  # Load and process the file
            return ub
        return numFound
Ejemplo n.º 6
0
def VPLBibleFileCheck( givenFolderName, strictCheck:bool=True, autoLoad:bool=False, autoLoadBooks:bool=False ):
    """
    Given a folder, search for VPL Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one VPL Bible is found,
        returns the loaded VPLBible object.
    """
    vPrint( 'Info', debuggingThisModule, "VPLBibleFileCheck( {}, {}, {}, {} )".format( givenFolderName, strictCheck, autoLoad, autoLoadBooks ) )
    if BibleOrgSysGlobals.debugFlag: assert givenFolderName and isinstance( givenFolderName, str )
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (True,False,)

    # Check that the given folder is readable
    if not os.access( givenFolderName, os.R_OK ):
        logging.critical( _("VPLBibleFileCheck: Given {} folder is unreadable").format( repr(givenFolderName) ) )
        return False
    if not os.path.isdir( givenFolderName ):
        logging.critical( _("VPLBibleFileCheck: Given {} path is not a folder").format( repr(givenFolderName) ) )
        return False

    # Find all the files and folders in this folder
    vPrint( 'Verbose', debuggingThisModule, " VPLBibleFileCheck: Looking for files in given {}".format( repr(givenFolderName) ) )
    foundFolders, foundFiles = [], []
    for something in os.listdir( givenFolderName ):
        somepath = os.path.join( givenFolderName, something )
        if os.path.isdir( somepath ):
            if something in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                continue # don't visit these directories
            foundFolders.append( something )
        elif os.path.isfile( somepath ):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
            ignore = False
            for ending in filenameEndingsToIgnore:
                if somethingUpper.endswith( ending): ignore=True; break
            if ignore: continue
            if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                foundFiles.append( something )

    # See if there's an VPLBible project here in this given folder
    numFound = 0
    looksHopeful = False
    lastFilenameFound = None
    for thisFilename in sorted( foundFiles ):
        if thisFilename in ('book_names.txt','Readme.txt' ): looksHopeful = True
        elif thisFilename.endswith( '.txt' ):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, givenFolderName )
                #vPrint( 'Quiet', debuggingThisModule, '1', repr(firstLine) )
                if firstLine is None: continue # seems we couldn't decode the file
                if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
                    logging.info( "VPLBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( thisFilename ) )
                    firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
                # Try to identify the VPL type
                match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', firstLine )
                if match: vplType = 1
                else:
                    match = re.search( '^(\\d{8})\\s', firstLine )
                    if match: vplType = 2
                    else:
                        match = re.search( '^# language_name:\\s', firstLine )
                        if match: vplType = 3
                        #else:
                            #match = re.search( '^; TITLE:\\s', firstLine )
                            # NOTE: These are now moved to a separate module ForgeForSwordSearcherBible.py
                            #if match: vplType = 4
                if match:
                    if BibleOrgSysGlobals.debugFlag:
                        vPrint( 'Quiet', debuggingThisModule, "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), firstLine ) )
                else:
                    vPrint( 'Verbose', debuggingThisModule, "VPLBibleFileCheck: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) )
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        vPrint( 'Info', debuggingThisModule, "VPLBibleFileCheck got", numFound, givenFolderName, lastFilenameFound )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = VPLBible( givenFolderName, lastFilenameFound[:-4] ) # Remove the end of the actual filename ".txt"
            if autoLoadBooks: uB.load() # Load and process the file
            return uB
        return numFound
    elif looksHopeful and BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, "    Looked hopeful but no actual files found" )

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted( foundFolders ):
        tryFolderName = os.path.join( givenFolderName, thisFolderName+'/' )
        if not os.access( tryFolderName, os.R_OK ): # The subfolder is not readable
            logging.warning( _("VPLBibleFileCheck: {!r} subfolder is unreadable").format( tryFolderName ) )
            continue
        vPrint( 'Verbose', debuggingThisModule, "    VPLBibleFileCheck: Looking for files in {}".format( tryFolderName ) )
        foundSubfolders, foundSubfiles = [], []
        try:
            for something in os.listdir( tryFolderName ):
                somepath = os.path.join( givenFolderName, thisFolderName, something )
                if os.path.isdir( somepath ): foundSubfolders.append( something )
                elif os.path.isfile( somepath ):
                    somethingUpper = something.upper()
                    somethingUpperProper, somethingUpperExt = os.path.splitext( somethingUpper )
                    ignore = False
                    for ending in filenameEndingsToIgnore:
                        if somethingUpper.endswith( ending): ignore=True; break
                    if ignore: continue
                    if not somethingUpperExt[1:] in extensionsToIgnore: # Compare without the first dot
                        foundSubfiles.append( something )
        except PermissionError: pass # can't read folder, e.g., system folder

        # See if there's an VPLBible here in this folder
        for thisFilename in sorted( foundSubfiles ):
            if thisFilename.endswith( '.txt' ):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile( thisFilename, tryFolderName )
                    #vPrint( 'Quiet', debuggingThisModule, '2', repr(firstLine) )
                    if firstLine is None: continue # seems we couldn't decode the file
                    if firstLine and firstLine[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "VPLBibleFileCheck: Detected Unicode Byte Order Marker (BOM) in {}".format( thisFilename ) )
                        firstLine = firstLine[1:] # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', firstLine )
                    if match: vplType = 1
                    else:
                        match = re.search( '^(\\d{8})\\s', firstLine )
                        if match: vplType = 2
                        else:
                            match = re.search( '^# language_name:\\s', firstLine )
                            if match: vplType = 3
                            #else:
                                #match = re.search( '^; TITLE:\\s', firstLine )
                                # NOTE: These are now moved to a separate module ForgeForSwordSearcherBible.py
                                #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            vPrint( 'Quiet', debuggingThisModule, "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), firstLine ) )
                    else:
                        vPrint( 'Verbose', debuggingThisModule, "VPLBibleFileCheck: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue
                foundProjects.append( (tryFolderName, thisFilename,) )
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        vPrint( 'Info', debuggingThisModule, "VPLBibleFileCheck foundProjects", numFound, foundProjects )
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            uB = VPLBible( foundProjects[0][0], foundProjects[0][1][:-4] ) # Remove the end of the actual filename ".txt"
            if autoLoadBooks: uB.load() # Load and process the file
            return uB
        return numFound
Ejemplo n.º 7
0
def YETBibleFileCheck(givenFolderName,
                      strictCheck: bool = True,
                      autoLoad: bool = False,
                      autoLoadBooks: bool = False):
    """
    Given a folder, search for YET Bible files or folders in the folder and in the next level down.

    Returns False if an error is found.

    if autoLoad is false (default)
        returns None, or the number of Bibles found.

    if autoLoad is true and exactly one YET Bible is found,
        returns the loaded YETBible object.
    """
    fnPrint(
        debuggingThisModule,
        "YETBibleFileCheck( {}, {}, {}, {} )".format(givenFolderName,
                                                     strictCheck, autoLoad,
                                                     autoLoadBooks))
    if BibleOrgSysGlobals.debugFlag:
        assert givenFolderName and isinstance(givenFolderName, (str, Path))
    if BibleOrgSysGlobals.debugFlag: assert autoLoad in (
            True,
            False,
    )

    # Check that the given folder is readable
    if not os.access(givenFolderName, os.R_OK):
        logging.critical(
            _("YETBibleFileCheck: Given {!r} folder is unreadable").format(
                givenFolderName))
        return False
    if not os.path.isdir(givenFolderName):
        logging.critical(
            _("YETBibleFileCheck: Given {!r} path is not a folder").format(
                givenFolderName))
        return False

    # Find all the files and folders in this folder
    vPrint(
        'Verbose', debuggingThisModule,
        " YETBibleFileCheck: Looking for files in given {}".format(
            givenFolderName))
    foundFolders, foundFiles = [], []
    for something in os.listdir(givenFolderName):
        somepath = os.path.join(givenFolderName, something)
        if os.path.isdir(somepath):
            if something in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                continue  # don't visit these directories
            foundFolders.append(something)
        elif os.path.isfile(somepath):
            somethingUpper = something.upper()
            somethingUpperProper, somethingUpperExt = os.path.splitext(
                somethingUpper)
            if somethingUpperExt in filenameEndingsToAccept:
                foundFiles.append(something)

    # See if there's an YETBible project here in this given folder
    numFound = 0
    lastFilenameFound = None
    for thisFilename in sorted(foundFiles):
        if thisFilename.endswith('.yet'):
            if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                firstLine = BibleOrgSysGlobals.peekIntoFile(
                    thisFilename, givenFolderName)
                if not firstLine.startswith("info\t"):
                    vPrint(
                        'Verbose', debuggingThisModule,
                        "YETBible (unexpected) first line was {!r} in {}".
                        format(firstLine, thisFilename))
                    continue
            lastFilenameFound = thisFilename
            numFound += 1
    if numFound:
        vPrint('Info', debuggingThisModule, "YETBibleFileCheck got", numFound,
               givenFolderName, lastFilenameFound)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            uB = YETBible(givenFolderName, lastFilenameFound[:-4]
                          )  # Remove the end of the actual filename ".yet"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound

    # Look one level down
    numFound = 0
    foundProjects = []
    for thisFolderName in sorted(foundFolders):
        tryFolderName = os.path.join(givenFolderName, thisFolderName + '/')
        if not os.access(tryFolderName,
                         os.R_OK):  # The subfolder is not readable
            logging.warning(
                _("YETBibleFileCheck: {!r} subfolder is unreadable").format(
                    tryFolderName))
            continue
        vPrint(
            'Verbose', debuggingThisModule,
            "    YETBibleFileCheck: Looking for files in {}".format(
                tryFolderName))
        foundSubfolders, foundSubfiles = [], []
        try:
            for something in os.listdir(tryFolderName):
                somepath = os.path.join(givenFolderName, thisFolderName,
                                        something)
                if os.path.isdir(somepath): foundSubfolders.append(something)
                elif os.path.isfile(somepath):
                    somethingUpper = something.upper()
                    somethingUpperProper, somethingUpperExt = os.path.splitext(
                        somethingUpper)
                    if somethingUpperExt in filenameEndingsToAccept:
                        foundSubfiles.append(something)
        except PermissionError:
            pass  # can't read folder, e.g., system folder

        # See if there's an YETBible project here in this folder
        for thisFilename in sorted(foundSubfiles):
            if thisFilename.endswith('.yet'):
                if strictCheck or BibleOrgSysGlobals.strictCheckingFlag:
                    firstLine = BibleOrgSysGlobals.peekIntoFile(
                        thisFilename, tryFolderName)
                    if not firstLine.startswith("info\t"):
                        vPrint(
                            'Verbose', debuggingThisModule,
                            "YETBible (unexpected) first line was {!r} in {}".
                            format(firstLine, thisFilename))
                        halt
                        continue
                foundProjects.append((
                    tryFolderName,
                    thisFilename,
                ))
                lastFilenameFound = thisFilename
                numFound += 1
    if numFound:
        vPrint('Info', debuggingThisModule, "YETBibleFileCheck foundProjects",
               numFound, foundProjects)
        if numFound == 1 and (autoLoad or autoLoadBooks):
            if BibleOrgSysGlobals.debugFlag: assert len(foundProjects) == 1
            uB = YETBible(
                foundProjects[0][0], foundProjects[0][1]
                [:-9])  # Remove the end of the actual filename "_utf8.txt"
            if autoLoadBooks: uB.load()  # Load and process the file
            return uB
        return numFound