class LocalPath(fileprovider.FilePath): contentTypes = ourmimetypes.MimeTypes() # Override parent class property by an attribute mimeType = None def __init__(self, path): self._path = path self.mimeType = self.contentTypes.fromPath(path) def __str__(self): return "<%s '%s'>" % (type(self).__name__, self._path) def child(self, name): childpath = self._getChildPath(name) return type(self)(childpath) def open(self): raise NotImplementedError() ## Protected Methods ## def _getChildPath(self, name): """ @param name: the name of a child of the pointed directory @type name: str @return: the path of the child @rtype: str @raises InsecureError: if the specified name compromise security """ norm = os.path.normpath(name) if os.sep in norm: raise InsecureError("Child name '%s' contains one or more " "directory separators" % (name, )) childpath = os.path.abspath(os.path.join(self._path, norm)) if not childpath.startswith(self._path): raise InsecureError("Path '%s' is not a child of '%s'" % (childpath, self._path)) return childpath
class CachedSource(resource_manager.DataSource): """ Data source that read data directly from a localy cached file. """ mimetypes = ourmimetypes.MimeTypes() def __init__(self, ident, url, cachedFile, stats): self.identifier = ident self.url = url self._file = cachedFile self.stats = stats self.mimeType = self.mimetypes.fromPath(url.path) self.mtime = cachedFile.stat[stat.ST_MTIME] self.size = cachedFile.stat[stat.ST_SIZE] self._current = cachedFile.tell() def produce(self, consumer, offset): # A producer for a cached file is not really convenient # because it's better used pulling than pushing. return None def read(self, offset, size): if offset != self._current: self._file.seek(offset) data = self._file.read(size) size = len(data) self.stats.onBytesRead(0, size, 0) self._current = offset + size return data def close(self): self.stats.onClosed() self._file.close() self._file = None
class CachingSession(BaseCachingSession, log.Loggable): """ Caches a stream locally in a temporary file. The already cached data can be read from the session. Can be canceled, meaning the session is not valid anymore. Can be aborted, meaning the session will stop caching locally but is still valid. The caching operation can be started at any moment, but the session have to receive the stream info before it can be used with a RemoteSource instance. It can recover request failures up to MAX_RESUME_COUNT times. """ logCategory = "caching-session" (PIPELINING, REQUESTING, BUFFERING, CACHING, CACHED, DETACHED, CLOSED, CANCELED, ABORTED, ERROR) = range(10) mimetypes = ourmimetypes.MimeTypes() def __init__(self, strategy, url, cache_stats, ifModifiedSince=None): self.strategy = strategy self.url = url self.identifier = strategy.cachemgr.getIdentifier(url.path) self.ifModifiedSince = ifModifiedSince self.cache_stats = cache_stats self._refcount = 0 self._state = self.PIPELINING self._request = None self.checkModified = False self._infoDefers = [] self._startedDefers = [] self._finishedDefers = [] self._errorValue = None self._file = None self._bytes = 0 self._correction = 0 self._resumes = MAX_RESUME_COUNT self.logName = common.log_id(self) # To be able to track the instance self.strategy._onNewSession(self) self.log("Caching session created for %s", url) def isActive(self): return (self._state < self.CLOSED) or (self._state == self.ABORTED) def getState(self): return self._state def cache(self): """ Starts caching the remote resource locally. """ if self._state != self.PIPELINING: return self._state = self.REQUESTING self.debug("Caching requested for %s", self.url) self.cache_stats.onCopyStarted() self._firstRetrieve() def waitInfo(self): if self._state < self.BUFFERING: d = defer.Deferred() self._infoDefers.append(d) return d if self._state <= self.CLOSED: return defer.succeed(self) return defer.fail(self._errorValue) def waitStarted(self): if self._state <= self.REQUESTING: d = defer.Deferred() self._startedDefers.append(d) return d if self._state <= self.CLOSED: return defer.succeed(self) return defer.fail(self._errorValue) def waitFinished(self): if self._state < self.DETACHED: d = defer.Deferred() self._finishedDefers.append(d) return d if self._state <= self.CLOSED: return defer.succeed(self) return defer.fail(self._errorValue) def read(self, offset, size): if self._state == self.CANCELED: raise fileprovider.FileOutOfDate("File out of date") if self._state == self.ABORTED: return None if self._state >= self.CLOSED: raise fileprovider.FileClosedError("Session Closed") if self._file is None: return None if min(self.size, offset + size) > self._bytes: return None self._file.seek(offset) return self._file.read(size) def cancel(self): """ After calling this method the session cannot be used anymore. """ if self._state < self.REQUESTING or self._state >= self.CACHED: return self.log("Canceling caching session for %s", self.url) self.strategy._onSessionCanceled(self) self.cache_stats.onCopyCancelled(self.size, self._bytes) self._close() error = fileprovider.FileOutOfDate("File out of date") self._fireError(error) if self._request: self.debug("Caching canceled for %s (%d/%d Bytes ~ %d %%)", self.url, self._bytes, self.size, self.size and int(self._bytes * 100 / self.size)) self._request.cancel() self._request = None else: self.debug("Caching canceled before starting to cache") self._state = self.CANCELED def abort(self): """ After calling this method the session will just stop caching and return None when trying to read. Used when pipelining is wanted. """ if self._state < self.REQUESTING or self._state >= self.CACHED: return self.log("Aborting caching session for %s", self.url) self.strategy._onSessionCanceled(self) self.cache_stats.onCopyCancelled(self.size, self._bytes) self._close() error = fileprovider.FileError("Caching aborted") self._fireError(error) if self._request: self.debug("Caching aborted for %s", self.url) self._request.cancel() self._request = None else: self.debug("Caching aborted before starting to cache") self._state = self.ABORTED def addref(self): self._refcount += 1 def delref(self): self._refcount -= 1 if self._refcount == 0: if self._state == self.DETACHED: # not referenced, so no we can close the file self.log("Detached session not referenced anymore") self._close() def isref(self): return self._refcount > 0 ### StreamConsumer ### def serverError(self, getter, code, message): self.warning("Session request error %s (%s) for %s using %s:%s", message, code, self.url, getter.host, getter.port) if code in (common.SERVER_DISCONNECTED, common.SERVER_TIMEOUT): if self._resumes > 0: self._resumes -= 1 if self._state > self.REQUESTING: # We already have request info offset = self._bytes size = self.size - self._bytes self.debug("Resuming retrieval from offset %d with " "size %d of %s (%d tries left)", offset, size, self.url, self._resumes) self._resumeRetrieve(offset, size) return else: # We don't have any info, e must retry from scratch self.debug("Resuming retrieval from start of %s " "(%d tries left)", self.url, self._resumes) self._firstRetrieve() return self.debug("Too much resuming intents, stopping " "after %d of %s bytes of %s", self._bytes, self.size, self.url) self._close() self._error(fileprovider.UnavailableError(message)) def conditionFail(self, getter, code, message): if code == common.STREAM_MODIFIED: # Modified file detected during recovery self.log("Modifications detected during recovery of %s", self.url) self.cancel() return self.log("Unexpected HTTP condition failed: %s", message) self._close() self._error(ConditionError(message, code=code)) def streamNotAvailable(self, getter, code, message): self.log("Stream to be cached is not available: %s", message) self._close() if code == common.STREAM_NOTFOUND: self._error(fileprovider.NotFoundError(message)) elif code == common.STREAM_FORBIDDEN: self._error(fileprovider.AccessError(message)) else: self._error(fileprovider.FileError(message)) def onInfo(self, getter, info): if self._state == self.BUFFERING: # We are resuming while waiting for a temporary file, # so we still don't want to accumulate data self._request.pause() return if self._state != self.REQUESTING: # Already canceled, or recovering from disconnection return if info.size != (info.length - self._bytes): self.log("Unexpected stream size: %s / %s bytes " "(Already got %s bytes)", info.size, info.length, self._bytes) self._close() msg = "Unexpected resource size: %d" % info.size self._error(fileprovider.FileError(msg)) return self._state = self.BUFFERING self.mimeType = self.mimetypes.fromPath(self.url.path) self.mtime = info.mtime self.size = info.size self.log("Caching session with type %s, size %s, mtime %s for %s", self.mimeType, self.size, self.mtime, self.url) self._file = StringIO() # To wait until we got the real one self.log("Requesting temporary file for %s", self.url) d = self.strategy.cachemgr.newTempFile(self.url.path, info.size, info.mtime) # But we don't want to accumulate data # but it is possible to receive a small amount of data # even after calling pause(), so we need buffering. self._request.pause() # We have got meta data, so callback self._fireInfo(self) self._fireStarted(self) self.debug("Start buffering %s", self.url) d.addCallback(self._gotTempFile) def _gotTempFile(self, tempFile): if self._state not in (self.BUFFERING, self.CACHED): # Already canceled if tempFile: tempFile.close() return if tempFile is None: self.warning("Temporary file creation failed, " "aborting caching of %s", self.url) self.abort() return self.log("Got temporary file for %s", self.url) self.debug("Start caching %s", self.url) data = self._file.getvalue() self._file = tempFile tempFile.write(data) if self._request is not None: # We still have a request, so we want more data of it self._request.resume() if self._state == self.CACHED: # Already got all the data self._real_complete() else: self._state = self.CACHING def onData(self, getter, data): assert self._state in (self.BUFFERING, self.CACHING), "Not caching" self._file.seek(self._bytes) size = len(data) try: self._file.write(data) except Exception, e: self.warning("Error writing in temporary file: %s", e) self.debug("Got %s / %s bytes, would be %s with %s more", self._bytes, self.size, self._bytes + size, size) self.abort() else:
def reloadMimeTypes(): LocalPath.contentTypes = ourmimetypes.MimeTypes()