Beispiel #1
0
 def __init__(self,
              url,
              fileOrName,
              method="GET",
              postdata=None,
              cookies={},
              headers=None,
              agent=None,
              timeout=None):
     # XXX re-add support for cookies and timeout to HTTPDownloader call
     HTTPDownloader.__init__(self,
                             url,
                             fileOrName,
                             method=method,
                             postdata=postdata,
                             headers=headers,
                             agent=agent)
     self.value = {
         "startTime": time.time(),
         "timeToConnect": 0,
         "timeToFirstByte": 0,
         "elapsedTime": 0,
         "bytesTransferred": 0,
     }
     self.cookies = cookies
     self.timeout = timeout
Beispiel #2
0
    def pagePart(self, data):
        HTTPDownloader.pagePart(self, data)

        # If we have a callback and 'OK' from server increment pos
        if self.writeProgress and self.status == '200':
            self.currentlength += len(data)
            for cb in self.writeProgress:
                if cb:
                    cb(self.currentlength, self.totallength)
Beispiel #3
0
    def pagePart(self, data):
        HTTPDownloader.pagePart(self, data)

        # If we have a callback and 'OK' from server increment pos
        if self.writeProgress and self.status == '200':
            self.currentlength += len(data)
            for cb in self.writeProgress:
                if cb:
                    cb(self.currentlength, self.totallength)
Beispiel #4
0
 def gotHeaders(self, headers):
     HTTPDownloader.gotHeaders(self, headers)
     # This method is being called twice sometimes,
     # first time without a content-range
     contentRange = headers.get('content-range', None)
     if contentRange and self.requestedPartial == 0:
         self.requestedPartial = self.origPartial
     if self.statusHandler:
         self.statusHandler.onHeaders(self, headers)
Beispiel #5
0
 def gotHeaders(self, headers):
     HTTPDownloader.gotHeaders(self, headers)
     # This method is being called twice sometimes,
     # first time without a content-range
     self.encoding = headers.get('content-encoding', None)
     contentRange = headers.get('content-range', None)
     if contentRange and self.requestedPartial == 0:
         self.requestedPartial = self.origPartial
     if self.statusHandler:
         self.statusHandler.onHeaders(self, headers)
Beispiel #6
0
    def __init__(self, url, fileOrName, writeProgress=None, *args, **kwargs):
        HTTPDownloader.__init__(self, url, fileOrName, supportPartial=0, *args, **kwargs)
        # Save callback(s) locally
        if writeProgress and not isinstance(writeProgress, list):
            writeProgress = [writeProgress]
        self.writeProgress = writeProgress

        # Initialize
        self.currentlength = 0
        self.totallength = None
Beispiel #7
0
    def gotHeaders(self, headers):
        HTTPDownloader.gotHeaders(self, headers)

        # If we have a callback and 'OK' from Server try to get length
        if self.writeProgress and self.status == '200':
            if 'content-length' in headers:
                self.totallength = int(headers['content-length'][0])
                for cb in self.writeProgress:
                    if cb:
                        cb(0, self.totallength)
Beispiel #8
0
    def __init__(self, url, fileOrName, writeProgress = None, *args, **kwargs):
        HTTPDownloader.__init__(self, url, fileOrName, supportPartial=0, *args, **kwargs)
        # Save callback(s) locally
        if writeProgress and type(writeProgress) is not list:
            writeProgress = [ writeProgress ]
        self.writeProgress = writeProgress

        # Initialize
        self.currentlength = 0
        self.totallength = None
Beispiel #9
0
    def gotHeaders(self, headers):
        HTTPDownloader.gotHeaders(self, headers)

        # If we have a callback and 'OK' from Server try to get length
        if self.writeProgress and self.status == '200':
            if headers.has_key('content-length'):
                self.totallength = int(headers['content-length'][0])
                for cb in self.writeProgress:
                    if cb:
                        cb(0, self.totallength)
Beispiel #10
0
    def __init__(self, url, fileOrName, writeProgress=None, *args, **kwargs):
        HTTPDownloader.__init__(self, url, fileOrName, *args, **kwargs)

        # Save callback(s) locally
        if writeProgress and type(writeProgress) is not list:
            writeProgress = [writeProgress]
        self.writeProgress = writeProgress

        # Initialize
        self.currentlength = 0
        self.totallength = None
Beispiel #11
0
 def __init__(self, url, fileOrName, method="GET", postdata=None, cookies={}, headers=None, agent=None, timeout=None):
     # XXX re-add support for cookies and timeout to HTTPDownloader call
     HTTPDownloader.__init__(self, url, fileOrName, method=method, postdata=postdata, headers=headers, agent=agent)
     self.value = {
         "startTime": time.time(),
         "timeToConnect": 0,
         "timeToFirstByte": 0,
         "elapsedTime": 0,
         "bytesTransferred": 0,
     }
     self.cookies = cookies
     self.timeout = timeout
Beispiel #12
0
 def pageEnd(self):
     if self.statusHandler:
         self.statusHandler.onEnd(self)
     # And the hacks are piling up, Twisted is really not very flexible
     if self.encoding and self.encoding[0] == 'gzip':
         self.file.close()
         g = gzip.open(self.fileName, 'rb')
         # This will blow up for large files
         decompressed = g.read()
         g.close()
         self.file = open(self.fileName, 'wb');
         self.file.write(decompressed);
     HTTPDownloader.pageEnd(self)
Beispiel #13
0
def downloadHTTP(url, fileOrName):
    """
    Another method to download from HTTP host.
    """
    global _UserAgentString
    scheme, host, port, path = parse_url(url)
    factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString)
    if proxy_is_on():
        host = get_proxy_host()
        port = get_proxy_port()
        factory.path = url
    reactor.connectTCP(host, port, factory)
    return factory.deferred
Beispiel #14
0
def downloadHTTP(url, fileOrName):
    """
    Another method to download from HTTP host.
    """
    global _UserAgentString
    scheme, host, port, path = parseurl(url)
    factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString)
    if proxy_is_on():
        host = get_proxy_host()
        port = get_proxy_port()
        factory.path = url
    reactor.connectTCP(host, port, factory)
    return factory.deferred
Beispiel #15
0
    def __init__(self, url, file, statusCallback=None, bucketFilter=None, *args, **kwargs):
        self.bytes_received = 0
        self.statusHandler = statusCallback
        self.bucketFilter = bucketFilter

        # TODO: Apparently this only works for servers, not clients :/
        #if self.bucketFilter:
        #   self.protocol = ShapedProtocolFactory(self.protocol, self.bucketFilter)

        HTTPDownloader.__init__(self, url, file, supportPartial=1,
                                agent='Downpour v%s' % VERSION,
                                *args, **kwargs)

        self.origPartial = self.requestedPartial
Beispiel #16
0
    def __init__(self, url, file, statusCallback=None, bucketFilter=None, *args, **kwargs):
        self.bytes_received = 0
        self.encoding = None
        self.statusHandler = statusCallback
        self.bucketFilter = bucketFilter

        # TODO: Apparently this only works for servers, not clients :/
        #if self.bucketFilter:
        #   self.protocol = ShapedProtocolFactory(self.protocol, self.bucketFilter)

        HTTPDownloader.__init__(self, url, file, supportPartial=1,
                                agent='Downpour v%s' % VERSION,
                                *args, **kwargs)

        self.origPartial = self.requestedPartial
Beispiel #17
0
    def _do_download_request(self,
                             document,
                             callback=None,
                             errback=None,
                             **params):
        url = '%s/%s?%s' % (self.API_SERVER, document,
                            urllib.urlencode(params))
        headers = self._get_headers()
        file_ = tempfile.NamedTemporaryFile(delete=False)

        downloader = HTTPDownloader(url,
                                    file_.name,
                                    agent=headers['User-Agent'],
                                    headers=headers)

        def errback_(error):
            if errback is not None:
                code = getattr(downloader, 'status', None)
                errback(code, error)

        def callback_(res):
            if getattr(downloader, 'status', None) == '200' and callback:
                callback(file_.name)

            # Remove the temporary file after the callback has handled it
            os.remove(file_.name)

        downloader.deferred.addErrback(errback_)
        downloader.deferred.addCallback(callback_)

        parsed = urlparse.urlparse(url)
        reactor.connectTCP(
            parsed.netloc.split(':')[0], parsed.port or 80, downloader)
        return downloader.deferred
	def gotHeaders(self, headers):
		# If we have a callback and 'OK' from Server try to get length
		if self.writeProgress and self.status == '200':
			if 'content-length' in headers:
				self.totallength = int(headers['content-length'][0])
				for cb in self.writeProgress:
					cb(0, self.totallength)

		return HTTPDownloader.gotHeaders(self, headers)
Beispiel #19
0
    def gotHeaders(self, headers):
        # If we have a callback and 'OK' from Server try to get length
        if self.writeProgress and self.status == '200':
            if headers.has_key('content-length'):
                self.totallength = int(headers['content-length'][0])
                for cb in self.writeProgress:
                    cb(0, self.totallength)

        return HTTPDownloader.gotHeaders(self, headers)
Beispiel #20
0
def download(url, file, contextFactory = None, *args, **kwargs):

	"""Download a remote file from http(s) or ftp.

	@param file: path to file on filesystem, or file-like object.

	See HTTPDownloader to see what extra args can be passed if remote file
	is accessible via http or https. Both Backends should offer supportPartial.
	"""
	scheme, host, port, path, username, password = _parse(url)

	if scheme == 'ftp':
		if not (username and password):
			username = '******'
			password = '******'

		client = FTPDownloader(
			host,
			port,
			path,
			file,
			username,
			password,
			*args,
			**kwargs
		)
		return client.deferred

	# We force username and password here as we lack a satisfying input method
	if username and password:
		from base64 import encodestring

		# twisted will crash if we don't rewrite this ;-)
		url = scheme + '://' + host + ':' + str(port) + path

		basicAuth = encodestring("%s:%s" % (username, password))
		authHeader = "Basic " + basicAuth.strip()
		AuthHeaders = {"Authorization": authHeader}

		if kwargs.has_key("headers"):
			kwargs["headers"].update(AuthHeaders)
		else:
			kwargs["headers"] = AuthHeaders

	factory = HTTPDownloader(url, file, *args, **kwargs)
	if scheme == 'https':
		from twisted.internet import ssl
		if contextFactory is None:
			contextFactory = ssl.ClientContextFactory()
		reactor.connectSSL(host, port, factory, contextFactory)
	else:
		reactor.connectTCP(host, port, factory)

	return factory.deferred
Beispiel #21
0
def downloadSSL(url, fileOrName, progress_func, certificates_filenames):
    """
    Another method to download from HTTPS.
    Not used at the moment.
    """
    global _UserAgentString
    from twisted.internet import ssl
    from OpenSSL import SSL  # @UnresolvedImport

    class MyClientContextFactory(ssl.ClientContextFactory):
        def __init__(self, certificates_filenames):
            self.certificates_filenames = list(certificates_filenames)

        def verify(self, connection, x509, errnum, errdepth, ok):
            return ok

        def getContext(self):
            ctx = ssl.ClientContextFactory.getContext(self)
            for cert in self.certificates_filenames:
                try:
                    ctx.load_verify_locations(cert)
                except:
                    pass
            ctx.set_verify(SSL.VERIFY_PEER | SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
                           self.verify)
            return ctx

    scheme, host, port, path = parse_url(url)
    if not isinstance(certificates_filenames, list):
        certificates_filenames = [
            certificates_filenames,
        ]
    cert_found = False
    for cert in certificates_filenames:
        if os.path.isfile(cert) and os.access(cert, os.R_OK):
            cert_found = True
            break
    if not cert_found:
        return fail(Exception('no one ssl certificate found'))

    factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString)
    contextFactory = MyClientContextFactory(certificates_filenames)
    reactor.connectSSL(host, port, factory,
                       contextFactory)  # @UndefinedVariable
    return factory.deferred
Beispiel #22
0
    def _command_download(self, data):
        reactor = self._context["reactor"]
        session_files = self._context["session_files"]
        audio_id = data["audio_id"]
        partial_url = data["partial_url"]

        ip_address = str(self.transport.getPeer().host)
        url = "http://" + ip_address + partial_url

        file_path = session_files.session_dir / f"{audio_id}.opus"

        log.info(f"Downloading file from {url} to {file_path}")

        url_bytes = url.encode("utf-8")
        url_parsed = URI.fromBytes(url_bytes)
        factory = HTTPDownloader(url_bytes, str(file_path))
        reactor.connectTCP(url_parsed.host, url_parsed.port, factory)
        d = factory.deferred

        def on_success(data):
            # File downloaded succesfully, tell the server
            result = {
                "command": "update_downloaded",
                "audio_id": audio_id,
                "result": "success"
            }
            result_json = json.dumps(result)
            self._tcp_packetizer.write(result_json)

        def on_error(error):
            # File failed to downloaded succesfully, tell the server
            log.error(f"Failed to download file at '{url}': {error}")
            result = {
                "command": "update_downloaded",
                "audio_id": audio_id,
                "result": "failure",
                "error": str(error)
            }
            result_json = json.dumps(result)
            self._tcp_packetizer.write(result_json)

        d.addCallback(on_success)
        d.addErrback(on_error)

        return d
Beispiel #23
0
 def start(self):
     g_logger.info("URL: %s" % self._url)
     g_logger.info("Local Path: %s" % self._local_path)
     g_logger.info("Download started.")
     print "test"
     
     factory = HTTPDownloader(self._url, self._local_path)
     factory.deferred.addErrback(g_logger.error)
     factory.deferred.addCallback(self._downloadComplete)
     if hasattr(self, '_callback'):
         factory.deferred.addCallback(self._callback, *self._callback_args,
                                      **self._callback_kw)
     
     if self._url.startswith("https://"):
         from twisted.internet import ssl
         content_factory = ssl.ClientContextFactory()
         reactor.connectSSL(factory.host, factory.port, factory, content_factory, timeout, bindAddress)
     else:
         reactor.connectTCP(factory.host, factory.port, factory)
Beispiel #24
0
def downloadSSL(url, fileOrName, progress_func, certificates_filenames):
    """
    Another method to download from HTTPS.
    """
    global _UserAgentString
    scheme, host, port, path = parse_url(url)
    if not isinstance(certificates_filenames, types.ListType):
        certificates_filenames = [
            certificates_filenames,
        ]
    cert_found = False
    for cert in certificates_filenames:
        if os.path.isfile(cert) and os.access(cert, os.R_OK):
            cert_found = True
            break
    if not cert_found:
        return fail(Exception('no one ssl certificate found'))
    factory = HTTPDownloader(url, fileOrName, agent=_UserAgentString)
    contextFactory = MyClientContextFactory(certificates_filenames)
    reactor.connectSSL(host, port, factory, contextFactory)
    return factory.deferred
Beispiel #25
0
 def buildProtocol(self, addr):
     self.value["timeToConnect"] = time.time() - self.value["startTime"]
     return HTTPDownloader.buildProtocol(self, addr)
Beispiel #26
0
 def pageStart(self, partialContent):
     self.value["timeToFirstByte"] = time.time() - self.value["startTime"]
     return HTTPDownloader.pageStart(self, partialContent)
Beispiel #27
0
 def clientConnectionLost(self, connector, reason):
     if (self.statusHandler):
         self.statusHandler.onStop(self)
     HTTPDownloader.clientConnectionLost(self, connector, reason)
Beispiel #28
0
 def clientConnectionFailed(self, connector, reason):
     if (self.statusHandler):
         self.statusHandler.onError(self)
     HTTPDownloader.clientConnectionFailed(self, connector, reason)
Beispiel #29
0
 def startedConnecting(self, connector):
     self.connector = connector
     if (self.statusHandler):
         self.statusHandler.onConnect(self)
     HTTPDownloader.startedConnecting(self, connector)
Beispiel #30
0
 def pagePart(self, data):
     self.value["bytesTransferred"] += len(data)
     return HTTPDownloader.pagePart(self, data)
Beispiel #31
0
 def pageEnd(self):
     if self.statusHandler:
         self.statusHandler.onEnd(self)
     HTTPDownloader.pageEnd(self)
Beispiel #32
0
 def pageStart(self, partialContent):
     HTTPDownloader.pageStart(self, partialContent)
     if self.statusHandler:
         self.statusHandler.onStart(self, partialContent)
Beispiel #33
0
 def pageEnd(self):
     self.value["elapsedTime"] = time.time() - self.value["startTime"]
     return HTTPDownloader.pageEnd(self)
Beispiel #34
0
 def gotHeaders(self, headers):
     self.response_headers = headers
     HTTPDownloader.gotHeaders(self, headers)
Beispiel #35
0
 def pageEnd(self):
     self.value["elapsedTime"] = time.time() - self.value["startTime"]
     return HTTPDownloader.pageEnd(self)
Beispiel #36
0
 def gotHeaders(self, headers):
     self.response_headers = headers
     HTTPDownloader.gotHeaders(self, headers)
Beispiel #37
0
 def pagePart(self, data):
     HTTPDownloader.pagePart(self, data)
     if self.statusHandler:
         self.statusHandler.onPart(self, data)
Beispiel #38
0
 def pagePart(self, data):
     self.value["bytesTransferred"] += len(data)
     return HTTPDownloader.pagePart(self, data)
Beispiel #39
0
 def buildProtocol(self, addr):
     self.value["timeToConnect"] = time.time() - self.value["startTime"]
     return HTTPDownloader.buildProtocol(self, addr)
Beispiel #40
0
 def pageStart(self, partialContent):
     self.value["timeToFirstByte"] = time.time() - self.value["startTime"]
     return HTTPDownloader.pageStart(self, partialContent)