def findItem(myClass, nameOrLocation, checksumType, checksumValue, displayName=None, additionalSourceFolders=None, progressReporter=True): '''Find an item locally, or download it''' # ---- validate input # nameOrLocation if not hasattr(nameOrLocation, 'capitalize') and not nameOrLocation is None: raise ValueError('findItem requires a string as a nameOrLocation, but got: ' + nameOrLocation) if nameOrLocation is not None and nameOrLocation.startswith('file://'): nameOrLocation = nameOrLocation[len('file://'):] # checksumType if not hasattr(checksumType, 'capitalize'): raise ValueError('findItem requires a string as a checksumType, but got: ' + checksumType) # checksumValue if not hasattr(checksumValue, 'capitalize'): raise ValueError('findItem requires a string as a checksumValue, but got: ' + checksumValue) # displayName if not hasattr(displayName, 'capitalize') and not displayName is None: raise ValueError('findItem requires a string or None as a displayName, but got: ' + displayName) # additionalSourceFolders if additionalSourceFolders is None: pass # nothing to do elif hasattr(additionalSourceFolders, 'capitalize') and os.path.isdir(additionalSourceFolders): pass # nothing to do elif hasattr(additionalSourceFolders, '__iter__'): # validate that these are all folders for thisFolder in additionalSourceFolders: if not os.path.isdir(thisFolder): raise ValueError('The folder given to findItem as an additionalSourceFolders either did not exist or was not a folder: ' + thisFolder) else: raise ValueError('Unable to understand the additionalSourceFolders given: ' + str(additionalSourceFolders)) # progressReporter if progressReporter is True: progressReporter = displayTools.statusHandler(taskMessage='Searching for ' + nameOrLocation) elif progressReporter is False: progressReporter = None # ---- start the timer for reporting startTime = time.time() # ---- look localy # -- absolute path, note: if an absolute path is not found we will error out if os.path.isabs(nameOrLocation): if progressReporter is not None: progressReporter.update(statusMessage=' looking at an absolute location') resultPath, reportCompleted = myClass.findItemInCaches(None, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter) if resultPath is not None: # note: if there is nothing at this path, we will get an error before this if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found at an absolute location and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() return resultPath # -- try relative path, include remote caches parsedNameOrLocation = urlparse.urlparse(nameOrLocation) if parsedNameOrLocation.scheme == '' and nameOrLocation.count(os.sep) > 0: if progressReporter is not None: progressReporter.update(statusMessage=' looking at relative locations') resultPath, reportCompleted = myClass.findItemInCaches(nameOrLocation, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter, includeRemoteCaches=True) if resultPath is not None: # note: if there is nothing at this path, we will get an error before this if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found at a relative location and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() return resultPath # -- based on checksum # first see if we already found this item # check the already verified items for this checksum checksumString = '%s-%s' % (checksumType, checksumValue) if checksumString in myClass.verifiedFiles: if progressReporter is not None: progressReporter.update(statusMessage=' found previously') progressReporter.finishLine() return myClass.verifiedFiles[checksumString] # look through the caches if progressReporter is not None: progressReporter.update(statusMessage=' looking based on checksum') resultPath, reportCompleted = myClass.findItemInCaches(None, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter) if resultPath is not None: myClass.addItemToVerifiedFiles(checksumString, resultPath) if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found based on checksum and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() return resultPath # -- based on name guessed from nameOrLocation, include remote caches locallyGuessedName = None if progressReporter is not None: progressReporter.update(statusMessage=' looking based on guessed name and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) if parsedNameOrLocation.scheme in ['http', 'https']: locallyGuessedName = os.path.basename(parsedNameOrLocation.path) else: locallyGuessedName = os.path.basename(nameOrLocation) resultPath, reportCompleted = myClass.findItemInCaches(locallyGuessedName, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter, includeRemoteCaches=True) if resultPath is not None: myClass.addItemToVerifiedFiles(checksumString, resultPath) if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found based on guessed name and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() return resultPath # -- based on display name if displayName is not None: if progressReporter is not None: progressReporter.update(statusMessage=' looking based on display name') resultPath, reportCompleted = myClass.findItemInCaches(displayName, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter) if resultPath is not None: myClass.addItemToVerifiedFiles(checksumString, resultPath) if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found based on display name and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() return resultPath # ---- look remotely over http/https if parsedNameOrLocation.scheme in ['http', 'https']: remoteGuessedName = locallyGuessedName # -- open a connection and get information to guess the name # open the connection readFile = None try: readFile = urllib2.urlopen(nameOrLocation) except IOError, error: if hasattr(error, 'reason'): raise Exception('Unable to connect to remote url: %s got error: %s' % (nameOrLocation, error.reason)) elif hasattr(error, 'code'): raise Exception('Got status code: %s while trying to connect to remote url: %s' % (str(error.code), nameOrLocation)) # try reading out the content-disposition header httpHeader = readFile.info() if httpHeader.has_key("content-disposition"): remoteGuessedName = httpHeader.getheader("content-disposition").strip() if remoteGuessedName is not locallyGuessedName: if progressReporter is not None: progressReporter.update(statusMessage=' looking based on name from content-disposition') resultPath, reportCompleted = myClass.findItemInCaches(remoteGuessedName, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter, includeRemoteCaches=True) if resultPath is not None: if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found based on name from content-disposition and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() readFile.close() return resultPath # try the name in the final URL secondRemoteGuessedName = os.path.basename( urllib.unquote(urlparse.urlparse(readFile.geturl()).path) ) if secondRemoteGuessedName not in [locallyGuessedName, remoteGuessedName]: if progressReporter is not None: progressReporter.update(statusMessage=' looking based on name in final URL') resultPath, reportCompleted = myClass.findItemInCaches(secondRemoteGuessedName, checksumType, checksumValue, displayName, additionalSourceFolders, progressReporter, includeRemoteCaches=True) if resultPath is not None: if progressReporter is not None and reportCompleted is False: progressReporter.update(statusMessage=' found based on name in final URL and verified in %s' % (displayTools.secondsToReadableTime(time.time() - startTime))) progressReporter.finishLine() readFile.close() return resultPath # -- download file # try to get the expected file length expectedLength = None if httpHeader.has_key("content-length"): try: expectedLength = int(httpHeader.getheader("content-length")) except: pass if progressReporter is not None: if expectedLength is None: progressReporter.update(statusMessage=' downloading ') else: progressReporter.update(statusMessage=' downloading %s ' % displayTools.bytesToRedableSize(expectedLength)) hashGenerator = hashlib.new(checksumType) downloadTargetPath = os.path.join(myClass.getCacheFolder(), os.path.splitext(secondRemoteGuessedName)[0] + " " + checksumString + os.path.splitext(secondRemoteGuessedName)[1]) processedBytes, processSeconds = checksum.checksumFileObject(hashGenerator, readFile, secondRemoteGuessedName, expectedLength, copyToPath=downloadTargetPath, progressReporter=progressReporter) if hashGenerator.hexdigest() != checksumValue: # os.unlink(downloadTargetPath) # Why would we throw the file away just because of a hash mismatch? readFile.close() raise FileNotFoundException("Downloaded file did not match checksum: %s (Find this: %s and replace it with this: %s)" % (nameOrLocation, checksumValue, hashGenerator.hexdigest())) if progressReporter is not None: progressReporter.update(statusMessage=' downloaded and verified %s in %s (%s/sec)' % (displayTools.bytesToRedableSize(processedBytes), displayTools.secondsToReadableTime(time.time() - startTime), displayTools.bytesToRedableSize(processedBytes/processSeconds))) progressReporter.finishLine() myClass.addItemToVerifiedFiles(checksumString, downloadTargetPath) readFile.close() return downloadTargetPath
def findItemInCaches(myClass, nameOrLocation, checksumType, checksumValue, displayName=None, additionalSourceFolders=None, progressReporter=True, includeRemoteCaches=False): # ---- validate input # nameOrLocation if not hasattr(nameOrLocation, 'capitalize') and not nameOrLocation is None: raise ValueError('findItem requires a string or none as a nameOrLocation, but got: ' + nameOrLocation) if nameOrLocation is not None and nameOrLocation.startswith('file://'): nameOrLocation = nameOrLocation[len('file://'):] if nameOrLocation is not None and urlparse.urlparse(nameOrLocation).scheme != '': raise ValueError('findItemInCaches only works on file paths or names, got: ' + str(nameOrLocation)) # checksumType if not hasattr(checksumType, 'capitalize'): raise ValueError('findItem requires a string as a checksumType, but got: ' + checksumType) # checksumValue if not hasattr(checksumValue, 'capitalize'): raise ValueError('findItem requires a string as a checksumValue, but got: ' + checksumValue) # displayName if not hasattr(displayName, 'capitalize') and not displayName is None: raise ValueError('findItem requires a string or None as a displayName, but got: ' + displayName) # additionalSourceFolders foldersToSearch = myClass.getSourceFolders() if additionalSourceFolders is None: pass # nothing to do elif hasattr(additionalSourceFolders, 'capitalize') and os.path.isdir(additionalSourceFolders): foldersToSearch.append(pathHelpers.normalizePath(additionalSourceFolders, followSymlink=True)) elif hasattr(additionalSourceFolders, '__iter__'): # validate that these are all folders for thisFolder in additionalSourceFolders: if not os.path.isdir(thisFolder): raise ValueError('The folder given to findItemInCaches as an additionalSourceFolders either did not exist or was not a folder: ' + thisFolder) foldersToSearch.append(pathHelpers.normalizePath(thisFolder, followSymlink=True)) else: raise ValueError('Unable to understand the additionalSourceFolders given: ' + str(additionalSourceFolders)) # progressReporter if progressReporter is True: progressReporter = displayTools.statusHandler(statusMessage='Searching cache folders for ' + nameOrLocation) elif progressReporter is False: progressReporter = None # ---- search for the items # absolute paths if nameOrLocation is not None and os.path.isabs(nameOrLocation): if os.path.exists(nameOrLocation): if checksumValue == checksum.checksum(nameOrLocation, checksumType=checksumType, progressReporter=progressReporter)['checksum']: return nameOrLocation, False else: raise FileNotFoundException('The item at the path given does not match the checksum given: ' + nameOrLocation) else: raise FileNotFoundException('No file/folder existed at the absolute path: ' + nameOrLocation) # relative path elif nameOrLocation is not None and os.path.exists(nameOrLocation): if checksumValue == checksum.checksum(nameOrLocation, checksumType=checksumType, progressReporter=progressReporter)['checksum']: return pathHelpers.normalizePath(nameOrLocation, followSymlink=True), False # cache folders for thisCacheFolder in foldersToSearch: parsedLocation = urlparse.urlparse('') if nameOrLocation is not None: parsedLocation = urlparse.urlparse(thisCacheFolder) if parsedLocation.scheme in ['http', 'https'] and includeRemoteCaches is False: continue elif parsedLocation.scheme in ['http', 'https'] and includeRemoteCaches is True: # -- try different paths on the server urlsToTry = {} (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(thisCacheFolder) # simple name urlsToTry[urlparse.urlunparse((scheme, netloc, os.path.join(path, nameOrLocation), params, query, fragment))] = True urlsToTry[urlparse.urlunparse((scheme, netloc, os.path.join(path, urllib.quote(nameOrLocation)), params, query, fragment))] = True # name including checksum nameWithChecksum = os.path.splitext(nameOrLocation)[0] + " " + checksumType + "-" + checksumValue + os.path.splitext(nameOrLocation)[1] urlsToTry[urlparse.urlunparse((scheme, netloc, os.path.join(path, nameWithChecksum), params, query, fragment))] = True urlsToTry[urlparse.urlunparse((scheme, netloc, os.path.join(path, urllib.quote(nameWithChecksum)), params, query, fragment))] = True for thisURL in urlsToTry.keys(): try: readFile = urllib2.urlopen(thisURL) except IOError, error: continue remoteGuessedName = os.path.basename(urllib.unquote(urlparse.urlparse(thisURL).path)) targetFileName = remoteGuessedName if checksumType + "-" + checksumValue not in targetFileName: targetFileName = os.path.splitext(remoteGuessedName)[0] + " " + checksumType + "-" + checksumValue + os.path.splitext(remoteGuessedName)[1] # try to get the expected file length httpHeader = readFile.info() expectedLength = None if httpHeader.has_key("content-length"): try: expectedLength = int(httpHeader.getheader("content-length")) except: pass if progressReporter is not None: if expectedLength is None: progressReporter.update(statusMessage=' downloading from local web cache ') else: progressReporter.update(statusMessage=' downloading %s from local web cache ' % displayTools.bytesToRedableSize(expectedLength)) # download file hashGenerator = hashlib.new(checksumType) startTime = time.time() targetFilePath = os.path.join(myClass.getCacheFolder(), targetFileName) processedBytes, processSeconds = checksum.checksumFileObject(hashGenerator, readFile, remoteGuessedName, expectedLength, copyToPath=targetFilePath, progressReporter=progressReporter) if hashGenerator.hexdigest() != checksumValue: os.unlink(targetFilePath) else: if progressReporter is not None: progressReporter.update(statusMessage=' downloaded from local web cache and verified %s in %s (%s/sec)' % (displayTools.bytesToRedableSize(processedBytes), displayTools.secondsToReadableTime(time.time() - startTime), displayTools.bytesToRedableSize(processedBytes/processSeconds))) progressReporter.finishLine() myClass.addItemToVerifiedFiles('%s-%s' % (checksumType, checksumValue), targetFilePath) readFile.close() return targetFilePath, True readFile.close() elif parsedLocation.scheme == '': # relative paths from the source folders if nameOrLocation is not None and nameOrLocation.count(os.sep) > 0 and os.path.exists(os.path.join(thisCacheFolder, nameOrLocation)): if checksumValue == checksum.checksum(os.path.join(thisCacheFolder, nameOrLocation), checksumType=checksumType, progressReporter=progressReporter)['checksum']: return pathHelpers.normalizePath(os.path.join(thisCacheFolder, nameOrLocation), followSymlink=True), False # walk up through the whole set for currentFolder, dirs, files in os.walk(thisCacheFolder, topdown=True): # check each file to see if it is what we are looking for for thisItemPath, thisItemName in [[os.path.join(currentFolder, internalName), internalName] for internalName in (files + dirs)]: # checksum in name fileNameSearchResults = myClass.fileNameChecksumRegex.search(thisItemName) nameChecksumType = None nameChecksumValue = None if fileNameSearchResults is not None: nameChecksumType = fileNameSearchResults.group('checksumType') nameChecksumValue = fileNameSearchResults.group('checksumValue') if nameChecksumType is not None and nameChecksumType.lower() == checksumType.lower() and nameChecksumValue is not None and nameChecksumValue == checksumValue: if checksumValue == checksum.checksum(thisItemPath, checksumType=checksumType, progressReporter=progressReporter)['checksum']: return thisItemPath, False # file name if nameOrLocation is not None: if nameOrLocation in [thisItemName, os.path.splitext(thisItemName)[0]] or os.path.splitext(nameOrLocation)[0] in [thisItemName, os.path.splitext(thisItemName)[0]]: if checksumValue == checksum.checksum(thisItemPath, checksumType=checksumType, progressReporter=progressReporter)['checksum']: return thisItemPath, False # don't decend into folders that look like bundles or sparce dmg's if os.path.isdir(thisItemPath): if os.listdir(thisItemPath) == ["Contents"] or os.listdir(thisItemPath) == ["Info.bckup", "Info.plist", "bands", "token"]: dirs.remove(thisItemName)
def checksum(location, tempFolderPrefix="InstaDMGtemp", checksumType="sha1", displayName=None, outputFolder=None, checksumInFileName=True, chunkSize=None, progressReporter=True): '''Return the checksum of a given file or folder''' startReportTime = time.time() # validate input if location is None: raise Exception('Checksum called with a empty file location') if checksumType is None: raise Exception('Checksum called with a empty checksum type') if outputFolder is not None and not os.path.isdir(outputFolder): raise Exception('The output folder given does not exist, or is not a folder: ' + outputFolder) # make sure that the location is a string location = str(location) # confirm that hashlib supports the hash type: try: hashlib.new(checksumType) except ValueError: raise Exception("Hash type: %s is not supported by hashlib" % checksumType) # if a local copy is made, this will house the location localCopyPath = None if outputFolder is not None: # make sure we have an absolute path to it outputFolder = pathHelpers.normalizePath(outputFolder, followSymlink=True) # warm up the checksummer hashGenerator = hashlib.new(checksumType) # get rid of file:// urls if location.startswith('file://'): location = location[len('file://'):] locationURL = urlparse.urlparse(location) if displayName is None: if locationURL.scheme in ['http', 'https']: displayName = os.path.basename(locationURL.path) else: displayName = os.path.basename(location) # get the progress reporter ready if progressReporter is True: # create a statusHandler to handle this if locationURL.scheme in ['http', 'https']: progressReporter = statusHandler(taskMessage="Downloading %s: " % displayName) else: progressReporter = statusHandler(taskMessage="Checksumming %s: " % displayName) elif progressReporter in [False, None]: progressReporter = None # need to be consistent elif not isinstance(progressReporter, statusHandler): raise Exception('Unable to understand what the progressReporter is: ' + str(progressReporter)) if locationURL.scheme is '': # a local path, check if it is a folder # make sure we have the canonical location location = pathHelpers.normalizePath(location, followSymlink=True) fileName = os.path.basename(location) if chunkSize is None: chunkSize = 1*1024*1024 # 1 MiB chunks for local files if not os.path.exists(location): raise Exception('Checksum called with a file location that does not exist: %s' % location) elif os.path.isdir(location): if outputFolder is not None: # validate outputFolder if there is one if os.path.samefile(location, outputFolder): raise ValueError('The output folder (%s) can not be the source item (%s)' % (outputFolder, location)) if location.startswith(outputFolder + "/"): raise ValueError('The output folder (%s) can not be inside the source item (%s)' % (outputFolder, location)) if os.path.samefile(os.path.dirname(location), outputFolder): raise ValueError('The output folder (%s) can not the the same as the source folder (%s)' % (outputFolder, os.path.dirname(location))) # create a temporary file until we get the checksum localCopyPath = tempfile.mkdtemp(prefix='checksumTempFolder.', dir=outputFolder) # register it with the tempFolderManager class so it will get cleaned up if something goes wrong tempFolderManager.addManagedItem(localCopyPath) if progressReporter is not None: progressReporter.update(statusMessage="building file list ", progressTemplate="%(value)i items", value=0) # get a quick count itemCount = 0 for thisFolder, subFolders, subFiles in os.walk(location): for thisFile in subFiles: thisFilePath = os.path.join(thisFolder, thisFile) # note: we skip anything that is not a link or a file (ie: /dev) if os.path.islink(thisFilePath) or os.path.isfile(thisFilePath): itemCount += 1 for thisFolder in subFolders: itemCount += 1 if progressReporter is not None: progressReporter.update(value=itemCount) # change the status message if progressReporter is not None: progressReporter.update(statusMessage=" checksumming item ", progressTemplate="%(value)i of %(expectedLength)i (%(progressPercentage)i%%)", expectedLength=itemCount, value=0) # process the items processedCount = 0 for processFolder, subFolders, subFiles in os.walk(location): for thisFile in subFiles: thisFilePath = os.path.join(processFolder, thisFile) relativeFilePath = os.path.join(processFolder.replace(location, '', 1), thisFile) if os.path.isabs(relativeFilePath): relativeFilePath = relativeFilePath[1:] if os.path.islink(thisFilePath): if localCopyPath is not None: os.symlink(os.readlink(thisFilePath), os.path.join(localCopyPath, relativeFilePath)) hashGenerator.update("softlink %s to %s" % (os.readlink(thisFilePath), relativeFilePath)) elif os.path.isfile(thisFilePath): readFile = open(thisFilePath) if readFile == None: raise Exception("Unable to open file for checksumming: " + thisFilePath) targetLength = os.stat(thisFilePath)[stat.ST_SIZE] writeTarget = None if localCopyPath is not None: writeTarget = os.path.join(localCopyPath, relativeFilePath) # add the path to the checksum hashGenerator.update("file " + relativeFilePath) checksumFileObject(hashGenerator, readFile, thisFile, targetLength, chunkSize, copyToPath=writeTarget) readFile.close() else: continue # skip anything that is not a link or a file (ie: /dev) processedCount += 1 if progressReporter is not None: progressReporter.update(value=processedCount) for thisFolder in subFolders: thisFolderPath = os.path.join(processFolder, thisFolder) relativeFolderPath = os.path.join(processFolder.replace(location, '', 1), thisFolder) if os.path.isabs(relativeFolderPath): relativeFolderPath = relativeFolderPath[1:] if os.path.islink(thisFolderPath): if localCopyPath is not None: os.symlink(os.readlink(thisFolderPath), os.path.join(localCopyPath, relativeFolderPath)) hashGenerator.update("softlink %s to %s" % (os.readlink(thisFolderPath), relativeFolderPath)) else: if localCopyPath != None: os.mkdir( os.path.join(localCopyPath, relativeFolderPath) ) # add this to the hash hashGenerator.update("folder %s" % relativeFolderPath) processedCount += 1 if progressReporter is not None: progressReporter.update(value=processedCount) if progressReporter is not None: progressReporter.update(statusMessage='checksummed %i items in %s' % (processedCount, secondsToReadableTime(time.time() - startReportTime))) if localCopyPath is not None: # check if there is already something there targetOutputPath = os.path.join(outputFolder, fileName) if os.path.exists(targetOutputPath): if os.path.islink(targetOutputPath) or os.path.isfile(targetOutputPath): os.unlink(targetOutputPath) else: shutil.rmtree(targetOutputPath) # ToDo: handle errors # move the folder into place os.rename(localCopyPath, targetOutputPath) # unregister it from tempFolderManager tempFolderManager.removeManagedItem(localCopyPath) # change the localCopyPath to reflect the new location localCopyPath = os.path.basename(targetOutputPath) elif os.path.isfile(location): fileName = os.path.basename(location) readFile = open(location) if readFile == None: raise Exception("Unable to open file for checksumming: %s" % location) targetLength = os.stat(location)[stat.ST_SIZE] if outputFolder is not None: # create a temporary file until we get the checksum localCopyFile, localCopyPath = tempfile.mkstemp(prefix='checksumTempFile.', dir=outputFolder) os.close(localCopyFile) # register it with the tempFolderManager class so it will get cleaned up if something goes wrong tempFolderManager.addManagedItem(localCopyPath) if progressReporter is not None: progressReporter.update(statusMessage=" checksumming: ", progressTemplate='%(progressPercentage)i%% (%(recentRateInBytes)s)', expectedLength=targetLength, value=0) processedBytes, processSeconds = checksumFileObject(hashGenerator, readFile, os.path.basename(location), targetLength, chunkSize=chunkSize, copyToPath=localCopyPath, progressReporter=progressReporter) if progressReporter is not None: progressReporter.update(statusMessage=' checksummed (%s) in %s (%s/sec)' % (bytesToRedableSize(processedBytes), secondsToReadableTime(processSeconds), bytesToRedableSize(processedBytes/processSeconds))) readFile.close() # if we are keeping a local copy, move it into place if localCopyPath is not None: # change the file name to the real one, including the checksum if not suppressed realFilePath = None if checksumInFileName is True: realFilePath = os.path.join(outputFolder, os.path.splitext(fileName)[0] + " " + checksumType + "-" + hashGenerator.hexdigest() + os.path.splitext(fileName)[1]) else: realFilePath = os.path.join(outputFolder, fileName) # try to move the item to the proper name os.rename(localCopyPath, realFilePath) # ToDo: proper error handling for all of the bad things that can happen here # unregister it from tempFolderManager tempFolderManager.removeManagedItem(localCopyPath) # change the localCopyPath to reflect the new location, and that it will now be pulled from the cache localCopyPath = os.path.basename(realFilePath) else: raise Exception('Checksum called on a location that is neither a file or folder: %s' % location) elif locationURL.scheme in ['http', 'https']: if chunkSize is None: chunkSize = 1024*100 # 100KiB for urls try: readFile = urllib2.urlopen(location) except IOError, error: if hasattr(error, 'reason'): raise Exception('Unable to connect to remote url: %s got error: %s' % (location, error.reason)) elif hasattr(error, 'code'): raise Exception('Got status code: %s while trying to connect to remote url: %s' % (str(error.code), location)) if readFile == None: raise Exception("Unable to open file for checksumming: %s" % location) # default the filename to the last bit of path of the url fileName = os.path.basename( urllib.unquote(urlparse.urlparse(readFile.geturl()).path) ) if fileName in [None, '']: fileName = 'No_filename_provided' targetLength = None # grab the name of the file and its length from the http headers if avalible httpHeader = readFile.info() if httpHeader.has_key("content-length"): try: targetLength = int(httpHeader.getheader("content-length")) except: pass # if httpHeader.has_key("content-disposition"): fileName = httpHeader.getheader("content-disposition").strip() if outputFolder is not None: # create a temporary file until we get the checksum localCopyFile, localCopyPath = tempfile.mkstemp(prefix='checksumTempFile.', dir=outputFolder) os.close(localCopyFile) # register it with the tempFolderManager class so it will get cleaned up if something goes wrong tempFolderManager.addManagedItem(localCopyPath) if progressReporter is not None: if targetLength is not None: progressReporter.update(statusMessage="downloading: ", progressTemplate='%(progressPercentage)i%% (%(recentRateInBytes)s)', expectedLength=targetLength, value=0) else: progressReporter.update(statusMessage="downloading: ", progressTemplate='%(valueInBytes)s (%(recentRateInBytes)s)', value=0) processedBytes, processSeconds = checksumFileObject(hashGenerator, readFile, fileName, targetLength, copyToPath=localCopyPath, chunkSize=chunkSize, progressReporter=progressReporter) if progressReporter is not None: progressReporter.update(statusMessage=" downloaded %s (%s) in %s (%s/sec)" % (fileName, bytesToRedableSize(processedBytes), secondsToReadableTime(processSeconds), bytesToRedableSize(processedBytes/processSeconds))) if localCopyPath is not None: # change the file name to the real one, including the checksum if not suppressed realFilePath = None if checksumInFileName is True: realFilePath = os.path.join(outputFolder, os.path.splitext(fileName)[0] + " " + checksumType + "-" + hashGenerator.hexdigest() + os.path.splitext(fileName)[1]) else: realFilePath = os.path.join(outputFolder, fileName) # try to move the item to the proper name os.rename(localCopyPath, realFilePath) # ToDo: proper error handling for all of the bad things that can happen here # unregister it from tempFolderManager tempFolderManager.removeManagedItem(localCopyPath) # change the localCopyPath to reflect the new location localCopyPath = realFilePath readFile.close()