コード例 #1
0
    def __init__(self,
                 parameterOne,
                 resourcesObject=None,
                 downloadAllBooks=False):
        """
        Create the Door43 cataloged Bible object.

        parameterOne can be:
            a catalog dictionary entry (and second parameter must be None)
        or
            an index into the BibleList in the resourcesObject passed as the second parameter
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(
                f"DCSBible.__init__( {parameterOne}, {resourcesObject}, {downloadAllBooks} )…"
            )

        if isinstance(parameterOne, dict):
            assert resourcesObject is None
            resourceDict = parameterOne
        else:
            assert isinstance(parameterOne, int)
            assert resourcesObject  # why ??? and isinstance( resourcesObject, Door43CatalogResources )
            resourceDict = resourcesObject.getBibleResourceDict(parameterOne)
        assert resourceDict and isinstance(resourceDict, dict)
        #print( 'resourceDict', resourceDict )
        #print( 'resourceDict', resourceDict.keys() )

        self.baseURL = resourceDict['html_url']
        #print( 'self.baseURL', self.baseURL )
        adjustedRepoName = resourceDict['full_name'].replace('/', '--')
        #print( 'adjustedRepoName', adjustedRepoName )
        desiredFolderName = BibleOrgSysGlobals.makeSafeFilename(
            adjustedRepoName)
        unzippedFolderPath = os.path.join(
            BibleOrgSysGlobals.DOWNLOADED_RESOURCES_FOLDER,
            'Door43ContentServiceOnline/', f"{adjustedRepoName}/")

        if downloadAllBooks:
            # See if files already exist and are current (so don't download again)
            alreadyDownloadedFlag = False
            if os.path.isdir(unzippedFolderPath):
                #print( f"Issued: {resourceDict['issued']}" )
                updatedDatetime = datetime.strptime(resourceDict['updated_at'],
                                                    '%Y-%m-%dT%H:%M:%SZ')
                #print( f"updatedDatetime: {updatedDatetime}" )
                #print( f"folder: {os.stat(unzippedFolderPath).st_mtime}" )
                folderModifiedDatetime = datetime.fromtimestamp(
                    os.stat(unzippedFolderPath).st_mtime)
                #print( f"folderModifiedDatetime: {folderModifiedDatetime}" )
                alreadyDownloadedFlag = folderModifiedDatetime > updatedDatetime
                #print( f"alreadyDownloadedFlag: {alreadyDownloadedFlag}" )

            if alreadyDownloadedFlag:
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print(
                        "Skipping download because folder '{}' already exists."
                        .format(unzippedFolderPath))
            else:  # Download the zip file (containing all the USFM files, README.md, LICENSE.md, manifest.yaml, etc.)
                # TODO: Change to .tar.gz instead of zip
                zipURL = self.baseURL + '/archive/master.zip'  # '/archive/master.tar.gz'
                if BibleOrgSysGlobals.verbosityLevel > 1:
                    print("Downloading entire repo from '{}'…".format(zipURL))
                try:
                    HTTPResponseObject = urllib.request.urlopen(zipURL)
                except urllib.error.URLError as err:
                    #errorClass, exceptionInstance, traceback = sys.exc_info()
                    #print( '{!r}  {!r}  {!r}'.format( errorClass, exceptionInstance, traceback ) )
                    logging.critical("DCS URLError '{}' from {}".format(
                        err, zipURL))
                    return
                # print( "  HTTPResponseObject", HTTPResponseObject )
                contentType = HTTPResponseObject.info().get('content-type')
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print("    contentType", repr(contentType))
                if contentType == 'application/octet-stream':
                    try:
                        os.makedirs(unzippedFolderPath)
                    except FileExistsError:
                        pass
                    downloadedData = HTTPResponseObject.read()
                    if BibleOrgSysGlobals.verbosityLevel > 0:
                        print(
                            f"  Downloaded {len(downloadedData):,} bytes from '{zipURL}'"
                        )
                    # Bug in Python up to 3.7 makes this not work for large aligned Bibles (3+ MB)
                    # myTempFile = tempfile.SpooledTemporaryFile()
                    myTempFile = tempfile.TemporaryFile()
                    myTempFile.write(downloadedData)
                    with zipfile.ZipFile(myTempFile) as myzip:
                        # NOTE: Could be a security risk here
                        myzip.extractall(unzippedFolderPath)
                    myTempFile.close()  # Automatically deletes the file
                else:
                    print("    contentType", repr(contentType))
                    halt  # unknown content type
            self.downloadedAllBooks = True

            # There's probably a folder inside this folder
            folders = os.listdir(unzippedFolderPath)
            #print( 'folders', folders )
            assert len(
                folders
            ) == 1  # else maybe a previous download failed -- just manually delete the folder
            desiredFolderName = folders[0] + '/'
            #print( 'desiredFolderName', desiredFolderName )
            USFMBible.__init__(self,
                               os.path.join(unzippedFolderPath,
                                            desiredFolderName),
                               givenName=resourceDict['name'])
        else:
            self.downloadedAllBooks = False
            self.attemptedDownload = {}
            try:
                os.makedirs(unzippedFolderPath)
            except FileExistsError:
                pass
            USFMBible.__init__(self,
                               unzippedFolderPath,
                               givenName=resourceDict['name'])
        self.objectNameString = 'DCS USFM Bible object'
コード例 #2
0
    def __init__(self, parameterOne, resourcesObject=None):
        """
        Create the Door43 cataloged Bible object.

        parameterOne can be:
            a catalog dictionary entry (and second parameter must be None)
        or
            an index into the BibleList in the resourcesObject passed as the second parameter
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(
                f"Door43CatalogBible.__init__( {parameterOne}, {resourcesObject} )…"
            )

        if isinstance(parameterOne, dict):
            assert resourcesObject is None
            resourceDict = parameterOne
        else:
            assert isinstance(parameterOne, int)
            assert resourcesObject  # why ??? and isinstance( resourcesObject, Door43CatalogResources )
            resourceDict = resourcesObject.getBibleResourceDict(parameterOne)
        assert resourceDict and isinstance(resourceDict, dict)
        #print( 'resourceDict', resourceDict )
        #print( 'resourceDict', resourceDict.keys() )

        if debuggingThisModule: print('formats', resourceDict['formats'])
        if 'formats' in resourceDict:
            formats = resourceDict['formats']
        else:
            assert len(resourceDict['projects']) == 1
            formats = resourceDict['projects'][0]['formats']
        assert formats
        for formatDict in formats:
            #print( 'formatDict', formatDict )
            formatString = formatDict['format']
            if 'application/zip;' in formatString and 'usfm' in formatString:
                size, zipURL = formatDict['size'], formatDict['url']
                break
        else:
            logging.critical(
                f"No zip URL found for '{resourceDict['language']}' '{resourceDict['title']}'"
            )
            return

        # See if files already exist and are current (so don't download again)
        alreadyDownloadedFlag = False
        unzippedFolderPath = os.path.join(
            BibleOrgSysGlobals.DOWNLOADED_RESOURCES_FOLDER, 'Door43Catalog/',
            f"{resourceDict['language']}_{resourceDict['title']}/")
        if os.path.isdir(unzippedFolderPath):
            #print( f"Issued: {resourceDict['issued']}" )
            issuedDatetime = datetime.strptime(resourceDict['issued'],
                                               '%Y-%m-%dT%H:%M:%S+00:00')
            #print( f"issuedDatetime: {issuedDatetime}" )
            #print( f"folder: {os.stat(unzippedFolderPath).st_mtime}" )
            folderModifiedDatetime = datetime.fromtimestamp(
                os.stat(unzippedFolderPath).st_mtime)
            #print( f"folderModifiedDatetime: {folderModifiedDatetime}" )
            alreadyDownloadedFlag = folderModifiedDatetime > issuedDatetime
            #print( f"alreadyDownloadedFlag: {alreadyDownloadedFlag}" )

        if alreadyDownloadedFlag:
            if BibleOrgSysGlobals.verbosityLevel > 1:
                print("Skipping download because folder '{}' already exists.".
                      format(unzippedFolderPath))
        else:  # Download the zip file (containing all the USFM files, LICENSE.md, manifest.yaml, etc.)
            if BibleOrgSysGlobals.verbosityLevel > 1:
                print("Downloading {:,} bytes from '{}'…".format(size, zipURL))
            try:
                HTTPResponseObject = urllib.request.urlopen(zipURL)
            except urllib.error.URLError as err:
                #errorClass, exceptionInstance, traceback = sys.exc_info()
                #print( '{!r}  {!r}  {!r}'.format( errorClass, exceptionInstance, traceback ) )
                logging.critical("Door43 URLError '{}' from {}".format(
                    err, zipURL))
                return None
            # print( "  HTTPResponseObject", HTTPResponseObject )
            contentType = HTTPResponseObject.info().get('content-type')
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print("    contentType", contentType)
            if contentType == 'application/zip':
                try:
                    os.makedirs(unzippedFolderPath)
                except FileExistsError:
                    pass
                # Bug in Python up to 3.7 makes this not work for large aligned Bibles (3+ MB)
                # myTempFile = tempfile.SpooledTemporaryFile()
                myTempFile = tempfile.TemporaryFile()
                myTempFile.write(HTTPResponseObject.read())
                with zipfile.ZipFile(myTempFile) as myzip:
                    # NOTE: Could be a security risk here
                    myzip.extractall(unzippedFolderPath)
            else:
                halt  # unknown content type

        # There's probably a folder inside this folder
        folders = os.listdir(unzippedFolderPath)
        #print( 'folders', folders )
        assert len(folders) == 1
        desiredFolderName = folders[0] + '/'
        #print( 'desiredFolderName', desiredFolderName )

        USFMBible.__init__(self,
                           os.path.join(unzippedFolderPath, desiredFolderName),
                           givenName=resourceDict['title'],
                           givenAbbreviation=resourceDict['identifier'])
        self.objectNameString = 'Door43 USFM Bible object'