def _scanDirectories(self, result = None, walker = None): """Walk each directory looking for cached files. @param result: the result of a DHT store request, not used (optional) @param walker: the walker to use to traverse the current directory (optional, defaults to creating a new walker from the first directory in the L{CacheManager.scanning} list) """ # Need to start walking a new directory if walker is None: # If there are any left, get them if self.scanning: log.msg('started scanning directory: %s' % self.scanning[0].path) walker = self.scanning[0].walk() else: log.msg('cache directory scan complete') return try: # Get the next file in the directory file = walker.next() except StopIteration: # No files left, go to the next directory log.msg('done scanning directory: %s' % self.scanning[0].path) self.scanning.pop(0) reactor.callLater(0, self._scanDirectories) return # If it's not a file ignore it if not file.isfile(): reactor.callLater(0, self._scanDirectories, None, walker) return # If it's already properly in the DB, ignore it db_status = self.db.isUnchanged(file) if db_status: reactor.callLater(0, self._scanDirectories, None, walker) return # Don't hash files in the cache that are not in the DB if self.scanning[0] == self.cache_dir: if db_status is None: log.msg('ignoring unknown cache file: %s' % file.path) else: log.msg('removing changed cache file: %s' % file.path) file.remove() reactor.callLater(0, self._scanDirectories, None, walker) return # Otherwise hash it log.msg('start hash checking file: %s' % file.path) hash = HashObject() df = hash.hashInThread(file) df.addBoth(self._doneHashing, file, walker)
def _save_complete(self, hash, url, destFile, destStream = None, modtime = None, decFile = None): """Update the modification time and inform the main program. @type hash: L{Hash.HashObject} @param hash: the hash object containing the expected hash for the file @param url: the URI of the actual mirror request @type destFile: C{twisted.python.FilePath} @param destFile: the file where the download was written to @type destStream: L{Streams.GrowingFileStream} @param destStream: the stream to notify that all data is available @type modtime: C{int} @param modtime: the modified time of the cached file (seconds since epoch) (optional, defaults to not setting the modification time of the file) @type decFile: C{twisted.python.FilePath} @param decFile: the file where the decompressed download was written to (optional, defaults to the file not having been compressed) """ result = hash.verify() if result or result is None: if destStream: destStream.allAvailable() if modtime: os.utime(destFile.path, (modtime, modtime)) if result: log.msg('Hashes match: %s' % url) dht = True else: log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url)) dht = False new_hash = self.db.storeFile(destFile, hash.digest(), dht, ''.join(hash.pieceDigests())) if self.manager: self.manager.new_cached_file(destFile, hash, new_hash, url) if decFile: # Hash the decompressed file and add it to the DB decHash = HashObject() ext_len = len(destFile.path) - len(decFile.path) df = decHash.hashInThread(decFile) df.addCallback(self._save_complete, url[:-ext_len], decFile, modtime = modtime) df.addErrback(self._save_error, url[:-ext_len], decFile) else: log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url)) if destStream: destStream.allAvailable(remove = True) if decFile: decFile.remove()