def __init__(self, url, fileOrName, method="GET", postdata=None, cookies={}, headers=None, agent=None, timeout=None): # XXX re-add support for cookies and timeout to HTTPDownloader call HTTPDownloader.__init__(self, url, fileOrName, method=method, postdata=postdata, headers=headers, agent=agent) self.value = { "startTime": time.time(), "timeToConnect": 0, "timeToFirstByte": 0, "elapsedTime": 0, "bytesTransferred": 0, } self.cookies = cookies self.timeout = timeout
def pagePart(self, data): HTTPDownloader.pagePart(self, data) # If we have a callback and 'OK' from server increment pos if self.writeProgress and self.status == '200': self.currentlength += len(data) for cb in self.writeProgress: if cb: cb(self.currentlength, self.totallength)
def gotHeaders(self, headers): HTTPDownloader.gotHeaders(self, headers) # This method is being called twice sometimes, # first time without a content-range contentRange = headers.get('content-range', None) if contentRange and self.requestedPartial == 0: self.requestedPartial = self.origPartial if self.statusHandler: self.statusHandler.onHeaders(self, headers)
def gotHeaders(self, headers): HTTPDownloader.gotHeaders(self, headers) # This method is being called twice sometimes, # first time without a content-range self.encoding = headers.get('content-encoding', None) contentRange = headers.get('content-range', None) if contentRange and self.requestedPartial == 0: self.requestedPartial = self.origPartial if self.statusHandler: self.statusHandler.onHeaders(self, headers)
def __init__(self, url, fileOrName, writeProgress=None, *args, **kwargs): HTTPDownloader.__init__(self, url, fileOrName, supportPartial=0, *args, **kwargs) # Save callback(s) locally if writeProgress and not isinstance(writeProgress, list): writeProgress = [writeProgress] self.writeProgress = writeProgress # Initialize self.currentlength = 0 self.totallength = None
def gotHeaders(self, headers): HTTPDownloader.gotHeaders(self, headers) # If we have a callback and 'OK' from Server try to get length if self.writeProgress and self.status == '200': if 'content-length' in headers: self.totallength = int(headers['content-length'][0]) for cb in self.writeProgress: if cb: cb(0, self.totallength)
def __init__(self, url, fileOrName, writeProgress = None, *args, **kwargs): HTTPDownloader.__init__(self, url, fileOrName, supportPartial=0, *args, **kwargs) # Save callback(s) locally if writeProgress and type(writeProgress) is not list: writeProgress = [ writeProgress ] self.writeProgress = writeProgress # Initialize self.currentlength = 0 self.totallength = None
def gotHeaders(self, headers): HTTPDownloader.gotHeaders(self, headers) # If we have a callback and 'OK' from Server try to get length if self.writeProgress and self.status == '200': if headers.has_key('content-length'): self.totallength = int(headers['content-length'][0]) for cb in self.writeProgress: if cb: cb(0, self.totallength)
def __init__(self, url, fileOrName, writeProgress=None, *args, **kwargs): HTTPDownloader.__init__(self, url, fileOrName, *args, **kwargs) # Save callback(s) locally if writeProgress and type(writeProgress) is not list: writeProgress = [writeProgress] self.writeProgress = writeProgress # Initialize self.currentlength = 0 self.totallength = None
def pageEnd(self): if self.statusHandler: self.statusHandler.onEnd(self) # And the hacks are piling up, Twisted is really not very flexible if self.encoding and self.encoding[0] == 'gzip': self.file.close() g = gzip.open(self.fileName, 'rb') # This will blow up for large files decompressed = g.read() g.close() self.file = open(self.fileName, 'wb'); self.file.write(decompressed); HTTPDownloader.pageEnd(self)
def downloadHTTP(url, fileOrName): """ Another method to download from HTTP host. """ global _UserAgentString scheme, host, port, path = parse_url(url) factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString) if proxy_is_on(): host = get_proxy_host() port = get_proxy_port() factory.path = url reactor.connectTCP(host, port, factory) return factory.deferred
def downloadHTTP(url, fileOrName): """ Another method to download from HTTP host. """ global _UserAgentString scheme, host, port, path = parseurl(url) factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString) if proxy_is_on(): host = get_proxy_host() port = get_proxy_port() factory.path = url reactor.connectTCP(host, port, factory) return factory.deferred
def __init__(self, url, file, statusCallback=None, bucketFilter=None, *args, **kwargs): self.bytes_received = 0 self.statusHandler = statusCallback self.bucketFilter = bucketFilter # TODO: Apparently this only works for servers, not clients :/ #if self.bucketFilter: # self.protocol = ShapedProtocolFactory(self.protocol, self.bucketFilter) HTTPDownloader.__init__(self, url, file, supportPartial=1, agent='Downpour v%s' % VERSION, *args, **kwargs) self.origPartial = self.requestedPartial
def __init__(self, url, file, statusCallback=None, bucketFilter=None, *args, **kwargs): self.bytes_received = 0 self.encoding = None self.statusHandler = statusCallback self.bucketFilter = bucketFilter # TODO: Apparently this only works for servers, not clients :/ #if self.bucketFilter: # self.protocol = ShapedProtocolFactory(self.protocol, self.bucketFilter) HTTPDownloader.__init__(self, url, file, supportPartial=1, agent='Downpour v%s' % VERSION, *args, **kwargs) self.origPartial = self.requestedPartial
def _do_download_request(self, document, callback=None, errback=None, **params): url = '%s/%s?%s' % (self.API_SERVER, document, urllib.urlencode(params)) headers = self._get_headers() file_ = tempfile.NamedTemporaryFile(delete=False) downloader = HTTPDownloader(url, file_.name, agent=headers['User-Agent'], headers=headers) def errback_(error): if errback is not None: code = getattr(downloader, 'status', None) errback(code, error) def callback_(res): if getattr(downloader, 'status', None) == '200' and callback: callback(file_.name) # Remove the temporary file after the callback has handled it os.remove(file_.name) downloader.deferred.addErrback(errback_) downloader.deferred.addCallback(callback_) parsed = urlparse.urlparse(url) reactor.connectTCP( parsed.netloc.split(':')[0], parsed.port or 80, downloader) return downloader.deferred
def gotHeaders(self, headers): # If we have a callback and 'OK' from Server try to get length if self.writeProgress and self.status == '200': if 'content-length' in headers: self.totallength = int(headers['content-length'][0]) for cb in self.writeProgress: cb(0, self.totallength) return HTTPDownloader.gotHeaders(self, headers)
def gotHeaders(self, headers): # If we have a callback and 'OK' from Server try to get length if self.writeProgress and self.status == '200': if headers.has_key('content-length'): self.totallength = int(headers['content-length'][0]) for cb in self.writeProgress: cb(0, self.totallength) return HTTPDownloader.gotHeaders(self, headers)
def download(url, file, contextFactory = None, *args, **kwargs): """Download a remote file from http(s) or ftp. @param file: path to file on filesystem, or file-like object. See HTTPDownloader to see what extra args can be passed if remote file is accessible via http or https. Both Backends should offer supportPartial. """ scheme, host, port, path, username, password = _parse(url) if scheme == 'ftp': if not (username and password): username = '******' password = '******' client = FTPDownloader( host, port, path, file, username, password, *args, **kwargs ) return client.deferred # We force username and password here as we lack a satisfying input method if username and password: from base64 import encodestring # twisted will crash if we don't rewrite this ;-) url = scheme + '://' + host + ':' + str(port) + path basicAuth = encodestring("%s:%s" % (username, password)) authHeader = "Basic " + basicAuth.strip() AuthHeaders = {"Authorization": authHeader} if kwargs.has_key("headers"): kwargs["headers"].update(AuthHeaders) else: kwargs["headers"] = AuthHeaders factory = HTTPDownloader(url, file, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def downloadSSL(url, fileOrName, progress_func, certificates_filenames): """ Another method to download from HTTPS. Not used at the moment. """ global _UserAgentString from twisted.internet import ssl from OpenSSL import SSL # @UnresolvedImport class MyClientContextFactory(ssl.ClientContextFactory): def __init__(self, certificates_filenames): self.certificates_filenames = list(certificates_filenames) def verify(self, connection, x509, errnum, errdepth, ok): return ok def getContext(self): ctx = ssl.ClientContextFactory.getContext(self) for cert in self.certificates_filenames: try: ctx.load_verify_locations(cert) except: pass ctx.set_verify(SSL.VERIFY_PEER | SSL.VERIFY_FAIL_IF_NO_PEER_CERT, self.verify) return ctx scheme, host, port, path = parse_url(url) if not isinstance(certificates_filenames, list): certificates_filenames = [ certificates_filenames, ] cert_found = False for cert in certificates_filenames: if os.path.isfile(cert) and os.access(cert, os.R_OK): cert_found = True break if not cert_found: return fail(Exception('no one ssl certificate found')) factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString) contextFactory = MyClientContextFactory(certificates_filenames) reactor.connectSSL(host, port, factory, contextFactory) # @UndefinedVariable return factory.deferred
def _command_download(self, data): reactor = self._context["reactor"] session_files = self._context["session_files"] audio_id = data["audio_id"] partial_url = data["partial_url"] ip_address = str(self.transport.getPeer().host) url = "http://" + ip_address + partial_url file_path = session_files.session_dir / f"{audio_id}.opus" log.info(f"Downloading file from {url} to {file_path}") url_bytes = url.encode("utf-8") url_parsed = URI.fromBytes(url_bytes) factory = HTTPDownloader(url_bytes, str(file_path)) reactor.connectTCP(url_parsed.host, url_parsed.port, factory) d = factory.deferred def on_success(data): # File downloaded succesfully, tell the server result = { "command": "update_downloaded", "audio_id": audio_id, "result": "success" } result_json = json.dumps(result) self._tcp_packetizer.write(result_json) def on_error(error): # File failed to downloaded succesfully, tell the server log.error(f"Failed to download file at '{url}': {error}") result = { "command": "update_downloaded", "audio_id": audio_id, "result": "failure", "error": str(error) } result_json = json.dumps(result) self._tcp_packetizer.write(result_json) d.addCallback(on_success) d.addErrback(on_error) return d
def start(self): g_logger.info("URL: %s" % self._url) g_logger.info("Local Path: %s" % self._local_path) g_logger.info("Download started.") print "test" factory = HTTPDownloader(self._url, self._local_path) factory.deferred.addErrback(g_logger.error) factory.deferred.addCallback(self._downloadComplete) if hasattr(self, '_callback'): factory.deferred.addCallback(self._callback, *self._callback_args, **self._callback_kw) if self._url.startswith("https://"): from twisted.internet import ssl content_factory = ssl.ClientContextFactory() reactor.connectSSL(factory.host, factory.port, factory, content_factory, timeout, bindAddress) else: reactor.connectTCP(factory.host, factory.port, factory)
def downloadSSL(url, fileOrName, progress_func, certificates_filenames): """ Another method to download from HTTPS. """ global _UserAgentString scheme, host, port, path = parse_url(url) if not isinstance(certificates_filenames, types.ListType): certificates_filenames = [ certificates_filenames, ] cert_found = False for cert in certificates_filenames: if os.path.isfile(cert) and os.access(cert, os.R_OK): cert_found = True break if not cert_found: return fail(Exception('no one ssl certificate found')) factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString) contextFactory = MyClientContextFactory(certificates_filenames) reactor.connectSSL(host, port, factory, contextFactory) return factory.deferred
def buildProtocol(self, addr): self.value["timeToConnect"] = time.time() - self.value["startTime"] return HTTPDownloader.buildProtocol(self, addr)
def pageStart(self, partialContent): self.value["timeToFirstByte"] = time.time() - self.value["startTime"] return HTTPDownloader.pageStart(self, partialContent)
def clientConnectionLost(self, connector, reason): if (self.statusHandler): self.statusHandler.onStop(self) HTTPDownloader.clientConnectionLost(self, connector, reason)
def clientConnectionFailed(self, connector, reason): if (self.statusHandler): self.statusHandler.onError(self) HTTPDownloader.clientConnectionFailed(self, connector, reason)
def startedConnecting(self, connector): self.connector = connector if (self.statusHandler): self.statusHandler.onConnect(self) HTTPDownloader.startedConnecting(self, connector)
def pagePart(self, data): self.value["bytesTransferred"] += len(data) return HTTPDownloader.pagePart(self, data)
def pageEnd(self): if self.statusHandler: self.statusHandler.onEnd(self) HTTPDownloader.pageEnd(self)
def pageStart(self, partialContent): HTTPDownloader.pageStart(self, partialContent) if self.statusHandler: self.statusHandler.onStart(self, partialContent)
def pageEnd(self): self.value["elapsedTime"] = time.time() - self.value["startTime"] return HTTPDownloader.pageEnd(self)
def gotHeaders(self, headers): self.response_headers = headers HTTPDownloader.gotHeaders(self, headers)
def pagePart(self, data): HTTPDownloader.pagePart(self, data) if self.statusHandler: self.statusHandler.onPart(self, data)