def ooni_main(self): # We don't have the Command object so cheating for now. url = 'http://check.torproject.org/' self.controlproxy = 'socks4a://127.0.0.1:9050' self.experimentalproxy = '' if not re.match("[a-zA-Z0-9]+\:\/\/[a-zA-Z0-9]+", url): return None scheme, host, port, path = client._parse(url) ctrl_dest = self.endpoint(scheme, host, port) if not ctrl_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) if self.controlproxy: _, proxy_host, proxy_port, _ = client._parse(self.controlproxy) control = SOCKSWrapper(reactor, proxy_host, proxy_port, ctrl_dest) else: control = ctrl_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('control', x)) control.connect(f) exp_dest = self.endpoint(scheme, host, port) if not exp_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) # FIXME: use the experiment proxy if there is one experiment = exp_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('experiment', x)) experiment.connect(f) reactor.run()
def test_parse(self): """ L{client._parse} correctly parses a URL into its various components. """ # The default port for HTTP is 80. self.assertEqual( client._parse('http://127.0.0.1/'), ('http', '127.0.0.1', 80, '/')) # The default port for HTTPS is 443. self.assertEqual( client._parse('https://127.0.0.1/'), ('https', '127.0.0.1', 443, '/')) # Specifying a port. self.assertEqual( client._parse('http://spam:12345/'), ('http', 'spam', 12345, '/')) # Weird (but commonly accepted) structure uses default port. self.assertEqual( client._parse('http://spam:/'), ('http', 'spam', 80, '/')) # Spaces in the hostname are trimmed, the default path is /. self.assertEqual( client._parse('http://foo '), ('http', 'foo', 80, '/'))
def ooni_main(self, cmd): # We don't have the Command object so cheating for now. url = cmd.hostname # FIXME: validate that url is on the form scheme://host[:port]/path scheme, host, port, path = client._parse(url) ctrl_dest = self.endpoint(scheme, host, port) if not ctrl_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) if cmd.controlproxy: assert scheme != 'https', "no support for proxied https atm, sorry" _, proxy_host, proxy_port, _ = client._parse(cmd.controlproxy) control = SOCKSWrapper(reactor, proxy_host, proxy_port, ctrl_dest) print "proxy: ", proxy_host, proxy_port else: control = ctrl_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('control', x)) control.connect(f) exp_dest = self.endpoint(scheme, host, port) if not exp_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) # FIXME: use the experiment proxy if there is one experiment = exp_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('experiment', x)) experiment.connect(f) reactor.run()
def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs): scheme, host, port, path = client._parse(url) self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs) if scheme == "https": self.connection = reactor.connectSSL(host, port, self.factory, ssl.ClientContextFactory()) else: self.connection = reactor.connectTCP(host, port, self.factory)
def getPageFactory(url, contextFactory=None, *args, **kwargs): def failedConnect(reason, factory): try: i = factory.status return reason except: pass #logger.warn("couldn't connect to %s:%d in getPageFactory: %s" # % (factory.host, factory.port, reason)) #logger.warn("state of factory is %s" % factory) #logger.warn("dir() of factory is %s" % dir(factory)) return reason if len(url) >= 16384: raise ValueError( "Too much data sent: twisted server doesn't appear to" " support urls longer than 16384") scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) factory.deferred.addErrback(failedConnect, factory) to = CONNECT_TO+random.randrange(2+CONNECT_TO_VAR)-CONNECT_TO_VAR if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory, timeout=to) return factory
def downloadPageFactory(url, file, progressCallback=None, agent="BitTorrent client", bindAddress=None, contextFactory=None): """Download a web page to a file. @param file: path to file on filesystem, or file-like object. """ scheme, host, port, path = client._parse(url) factory = ProgressHTTPDownloader(url, file, progressCallback=progressCallback, agent=agent, supportPartial=0) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory, bindAddress=bindAddress) else: reactor.connectTCP(host, port, factory, bindAddress=bindAddress) return factory
def getPage(url, contextFactory=None, *args, **kwargs): log.msg('Method: %s' % kwargs.get('method', 'GET')) log.msg('URI: %s' % url) try: log.msg('Headers: %r' % kwargs['headers']) except KeyError: pass try: log.msg('Payload: %r' % kwargs['postdata']) except KeyError: pass scheme, host, port, path = client._parse(url) factory = HTTPClientFactory(url, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) def _eb(failure): log.msg('Failed.') log.msg(failure) return failure return factory.deferred.addCallback(_checkCacheControl).addErrback(_eb)
def request(self, method, uri, headers=None, bodyProducer=None): parsedURI = client._parse(uri) host_addr = address.IPv4Address('TCP', parsedURI.host, parsedURI.port) # ripped from _AgentBase._requestWithEndpoint if headers is None: headers = Headers() if not headers.hasHeader('host'): headers = headers.copy() headers.addRawHeader( 'host', self._computeHostValue(parsedURI.scheme, parsedURI.host, parsedURI.port)) request = client.Request(method, parsedURI.path, headers, bodyProducer, persistent=False) c = ClientProtocol(request) # ouch self.root.putChild('', self.root) server = Site(self.root).buildProtocol(self.addr) loopbackAsync(server, c, host_addr, self.addr) return c.response.addBoth(self._done, c)
def getPage(url, factoryFactory = client.HTTPClientFactory, proxyFactoryFactory = ProxyHTTPClientFactory, *args, **kwargs): if not proxy_host or not proxy_port: #logging.debug('No proxy information - default behaviour') return client.getPage(url, *args, **kwargs) scheme, host, port, path = client._parse(url) if scheme == 'https': #logging.debug('Proxy and HTTPS - connect via new class') http_factory = factoryFactory(url, followRedirect = 0, *args, **kwargs) https_factory = https.ProxyHTTPSConnectionFactory(http_factory, host, port, True, proxy_user, proxy_pass) reactor.connectTCP(proxy_host, proxy_port, https_factory) return http_factory.deferred if 'headers' in kwargs: headers = kwargs['headers'] else: headers = {} if proxy_user and proxy_pass: auth = base64.encodestring("%s:%s" %(proxy_user, proxy_pass)) headers['Proxy-Authorization'] = 'Basic %s' % (auth.strip()) #logging.debug('Adding header: %s', headers['Proxy-Authorization']) kwargs['headers'] = headers #Cleanup proxy params factory = proxyFactoryFactory(url, proxy_host, proxy_port, followRedirect = 0, *args, **kwargs) #logging.debug('Do proxy %s %i', proxy_host, proxy_port) reactor.connectTCP(proxy_host, proxy_port, factory) return factory.deferred
def _getPage(self, req): scheme, host, port = _parse(req['url'])[0:3] factory = HTTPClientFactory( req['url'], method=req['method'], postdata=req['postdata'], headers=req['headers'], agent=req['agent'], timeout=req['timeout'], cookies=req['cookies'], followRedirect=req['follow_redirect'] ) if scheme == 'https': reactor.connectSSL( host, port, factory, AllCipherSSLClientContextFactory(), timeout=req['timeout'] ) else: reactor.connectTCP(host, port, factory, timeout=req['timeout']) factory.deferred.addCallback(self._getPageComplete, factory) factory.deferred.addErrback(self._getPageError, factory) return factory.deferred
def _httpRequest(url, soap_action, soap_envelope, timeout=DEFAULT_TIMEOUT, ctx_factory=None): # copied from twisted.web.client in order to get access to the # factory (which contains response codes, headers, etc) if type(url) is not str: e = RequestError('URL must be string, not %s' % type(url)) return defer.fail(e), None log.msg(" -- Sending payload --\n%s\n -- End of payload --" % soap_envelope, system=LOG_SYSTEM, payload=True) scheme, host, port, _ = twclient._parse(url) factory = twclient.HTTPClientFactory(url, method='POST', postdata=soap_envelope, timeout=timeout) factory.noisy = False # stop spewing about factory start/stop # fix missing port in header (bug in twisted.web.client) if port: factory.headers['host'] = host + ':' + str(port) factory.headers['Content-Type'] = 'text/xml' # CXF will complain if this is not set factory.headers['soapaction'] = soap_action factory.headers['Authorization'] = 'Basic bnNpZGVtbzpSaW9QbHVnLUZlc3QyMDExIQ==' # base64.b64encode('nsidemo:RioPlug-Fest2011!') if scheme == 'https': if ctx_factory is None: return defer.fail(RequestError('Cannot perform https request without context factory')), None reactor.connectSSL(host, port, factory, ctx_factory) else: reactor.connectTCP(host, port, factory) return factory.deferred, factory
def testParse(self): scheme, host, port, path = client._parse("http://127.0.0.1/") self.assertEquals(path, "/") self.assertEquals(port, 80) scheme, host, port, path = client._parse("https://127.0.0.1/") self.assertEquals(path, "/") self.assertEquals(port, 443) scheme, host, port, path = client._parse("http://spam:12345/") self.assertEquals(port, 12345) scheme, host, port, path = client._parse("http://foo ") self.assertEquals(host, "foo") self.assertEquals(path, "/") scheme, host, port, path = client._parse("http://egg:7890") self.assertEquals(port, 7890) self.assertEquals(host, "egg") self.assertEquals(path, "/")
def testFactoryInfo(self): url = self.getURL('file') scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url) reactor.connectSSL(host, port, factory, ssl.ClientContextFactory()) # The base class defines _cbFactoryInfo correctly for this return factory.deferred.addCallback(self._cbFactoryInfo, factory)
def getPageWithHeaders(url, *args, **kwargs): # basically a clone of client.getPage, but with a handle on the factory # so we can pull the headers later scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) reactor.connectTCP(host, port, factory) return factory
def startNextConnection(self, request): url = self.urls[self.urlpos] self.urlpos = (self.urlpos + 1) % len(self.urls) scheme, host, port, path = _parse(url) #print `url` protocol.ClientCreator(reactor, ShoutTransfer, request, self.startNextConnection).connectTCP(host, port)
def __init__(self, url, contextFactory=None, retries=0): url = stripNoPrint(url) if retries > 0: print "Retrying: ", url else: print "Get: ", url self.retries = retries self.url = url self.charset = None scheme, host, port, path = _parse(url) HTTPClientFactory.__init__(self, url, method='GET', postdata=None, headers=None, agent='Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US;' + ' rv:1.9.2.10) Gecko/20100914 Firefox/3.6.10') if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, self, contextFactory) else: reactor.connectTCP(host, port, self) self.deferred.addCallbacks(self.getCharset, self.Err) self.deferred.addCallbacks(self.getTitle, self.Err)
def getPage(url, bindAddress = None, *arg, **kw): # reimplemented here to insert bindAddress # _parse() deprecated in twisted 13.1.0 in favor of the _URI class if hasattr(client, '_parse'): scheme, host, port, path = client._parse(url) else: # _URI class renamed to URI in 15.0.0 try: from twisted.web.client import _URI as URI except ImportError: from twisted.web.client import URI uri = URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port path = uri.path factory = HTTPClientFactory(url, *arg, **kw) factory.noisy = False if scheme == 'https': from twisted.internet import ssl context = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, context, bindAddress = bindAddress) else: reactor.connectTCP(host, port, factory, bindAddress = bindAddress) return factory.deferred
def __init__(self, jobId, jobSpec): self.jobId = jobId self.jobSpec = jobSpec # attributes which configure the engine self.clientFunction = lambda self, t: 1 self.requests = {"":{}} self.userAgent = str("thundercloud client/%s" % constants.VERSION) self.iterator = lambda: True self.httpClientRequestQueue = Queue() self.jobState = JobState.NEW self.timeout = 10 # attributes for time management self.duration = float("inf") #60 self.startTime = None self.endTime = None self.elapsedTime = 0.00000001 # so clientFunction(0) != 0 self.pausedTime = 0.0 self._timeAtPause = 0.0 # attributes for data management self.bytesTransferred = 0 self.transferLimit = float("inf") # attributes for statistics generation self.iterations = 0 self.requestsCompleted = 0 self.requestsFailed = 0 self.errors = copy.deepcopy(JobResults().results_errors) self.statisticsByTime = copy.deepcopy(JobResults().results_byTime) self._averageTimeToConnect = 0 self._averageTimeToFirstByte = 0 self._averageResponseTime = 0 self.statsInterval = 60 self._statsBookmark = 0 # shortcut to last time stats were generated. # avoids listing/sorting statisticsByTime keys # read the job spec and update attributes self.requests = jobSpec.requests self.transferLimit = jobSpec.transferLimit self.duration = jobSpec.duration self.userAgent = jobSpec.userAgent self.statsInterval = jobSpec.statsInterval self.timeout = jobSpec.timeout self.clientFunction = lambda t: eval(jobSpec.clientFunction) # dump the host/port/URLs to be fetched into a queue for url in self.requests.keys(): scheme, host, port, path = _parse(str(url)) self.httpClientRequestQueue.put([host, port, str(self.requests[url]["method"]), str(url), self.requests[url]["postdata"], self.requests[url]["cookies"]]) db.execute("INSERT INTO jobs (id, startTime, spec) VALUES (?, ?, ?)", (self.jobId, datetime.datetime.now(), self.jobSpec)) db.execute("INSERT INTO accounting (job, elapsedTime, bytesTransferred) VALUES (?, ?, ?)", (self.jobId, 0, 0))
def getPage(url, bindAddress=None, *arg, **kw): # reimplemented here to insert bindAddress # _parse() deprecated in twisted 13.1.0 in favor of the _URI class if hasattr(client, '_parse'): scheme, host, port, path = client._parse(url) else: # _URI class renamed to URI in 15.0.0 try: from twisted.web.client import _URI as URI except ImportError: from twisted.web.client import URI uri = URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port path = uri.path factory = HTTPClientFactory(url, *arg, **kw) factory.noisy = False if scheme == 'https': from twisted.internet import ssl context = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, context, bindAddress=bindAddress) else: reactor.connectTCP(host, port, factory, bindAddress=bindAddress) return factory.deferred
def get_page(self, headers={}, contextFactory=None, *args, **kwargs): scheme, _, _, _ = txwebclient._parse(self.url) factory = txwebclient.HTTPClientFactory(self.url) for k, v in headers.iteritems(): factory.headers[k] = v.encode("utf-8") try: if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() if self.use_proxy: reactor.connectSSL(self.proxy_host, self.proxy_port, factory, contextFactory) else: reactor.connectSSL(self.host, self.port, factory, contextFactory) else: if self.use_proxy: reactor.connectTCP(self.proxy_host, self.proxy_port, factory) else: reactor.connectTCP(self.host, self.port, factory) except Exception, ex: code = getattr(ex, 'status', None) log.error('return code: %s, msg: %s', code, ex.message)
def getPage(url, contextFactory=None, *args, **kwargs): """Download a web page as a string. Download a page. Return a deferred, which will callback with a page (as a string) or errback with a description of the error. See HTTPClientFactory to see what extra args can be passed. @note: This function taken from Twisted source code (twisted/web/client.py) and modified so that it wouldn't be noisy. Twisted source code is BSD licensed. """ scheme, host, port, path = http_client._parse(url) factory = http_client.HTTPClientFactory(url, *args, **kwargs) # CASTDOT-CUSTOM: make it so this function is not noisy factory.noisy = False if scheme == "https": from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) # IGNORE:E1101 else: reactor.connectTCP(host, port, factory) # IGNORE:E1101 return factory.deferred
def downloadToFile(url, file, contextFactory=None, *args, **kwargs): """Download a web page to a file. @param file: path to file on filesystem, or file-like object. See HTTPDownloader to see what extra args can be passed. @note: This function taken from downloadToPage function in Twisted source code (twisted/web/client.py) and modified so that it wouldn't be noisy. Twisted source code is BSD licensed. """ log.msg( 'Making HTTP request to "%s" -- downloading returned data to "%s"...' % (url, file), lvl="d2", ss="ss_webreq" ) scheme, host, port, path = http_client._parse(url) factory = http_client.HTTPDownloader(url, file, *args, **kwargs) # CASTDOT-CUSTOM: make it so this function is not noisy factory.noisy = False if scheme == "https": from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) # IGNORE:E1101 else: reactor.connectTCP(host, port, factory) # IGNORE:E1101 return factory.deferred
def _RestApiClient__request(cls, url, method, postdata=None, cookies={}, timeout=None, credentials=None): extraHeaders = {} if postdata is not None: postdata = json.dumps(postdata) if credentials is not None: cred = "%s:%s" % (credentials[0], credentials[1]) extraHeaders["Authorization"] = "Basic " + base64.encodestring(cred).replace('\012','') scheme, host, port, path = _parse(str(url)) # replace multiple slashes in the url to a single slash # in the name of genericism this might be a bad idea but # whatever while "//" in path: path = path.replace("//", "/") if path[0] == "/": path = path[1:] url = str("%s://%s:%s/%s" % (scheme, host, port, path)) log.debug("REST API Client request: %s %s" % (method, url)) factory = HTTPClientFactory(url, method=method, postdata=postdata, cookies=cookies, timeout=timeout, headers=extraHeaders) reactor.connectTCP(host, port, factory) return factory
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a new request via the configured proxy. """ if version >= Version('twisted', 13, 1, 0): parsed_uri = _URI.getFromBytes(uri) scheme = parsed_uri.scheme host = parsed_uri.host port = parsed_uri.port else: scheme, host, port, path = _parse(uri) request_path = uri d = self._connect(scheme, host, port) if headers is None: headers = Headers() if not headers.hasHeader('host'): # This is a lot of copying. It might be nice if there were a bit # less. headers = Headers(dict(headers.getAllRawHeaders())) headers.addRawHeader( 'host', self._computeHostValue(scheme, host, port)) def cbConnected(proto): # NOTE: For the proxy case the path should be the full URI. return proto.request( Request(method, request_path, headers, bodyProducer)) d.addCallback(cbConnected) return d
def download(self, url, fakeoutfile, outputfile, *args, **kwargs): try: scheme, host, port, path = client._parse(url) if scheme == 'https': self.writeln('Sorry, SSL not supported in this release') self.exit() return None elif scheme != 'http': raise exceptions.NotImplementedError except: self.writeln('%s: Unsupported scheme.' % (url, )) self.exit() return None self.writeln('--%s-- %s' % (time.strftime('%Y-%m-%d %H:%M:%S'), url)) self.writeln('Connecting to %s:%d... connected.' % (host, port)) self.write('HTTP request sent, awaiting response... ') factory = HTTPProgressDownloader(self, fakeoutfile, url, outputfile, *args, **kwargs) out_addr = None if self.honeypot.env.cfg.has_option('honeypot', 'out_addr'): out_addr = (self.honeypot.env.cfg.get('honeypot', 'out_addr'), 0) self.connection = reactor.connectTCP(host, port, factory, bindAddress=out_addr) return factory.deferred
def download(self, url, fakeoutfile, outputfile, *args, **kwargs): try: scheme, host, port, path = client._parse(url) if scheme == 'https': self.writeln('Sorry, SSL not supported in this release') self.exit() return None elif scheme != 'http': raise exceptions.NotImplementedError except: self.writeln('%s: Unsupported scheme.' % (url,)) self.exit() return None self.writeln('--%s-- %s' % (time.strftime('%Y-%m-%d %H:%M:%S'), url)) self.writeln('Connecting to %s:%d... connected.' % (host, port)) self.write('HTTP request sent, awaiting response... ') factory = HTTPProgressDownloader( self, fakeoutfile, url, outputfile, *args, **kwargs) out_addr = None if self.honeypot.env.cfg.has_option('honeypot', 'out_addr'): out_addr = (self.honeypot.env.cfg.get('honeypot', 'out_addr'), 0) self.connection = reactor.connectTCP( host, port, factory, bindAddress=out_addr) return factory.deferred
def _request (self, method, url, *a, **kw) : if not url or (not url.startswith("http://") and not url.startswith("https://")) : if not self._base_url : return defer.maybeDeferred(lambda x : Response(url, ), ) url = urllib.basejoin(self._base_url, (url and url or ""), ) _scheme, _host, _port, _path = client_http._parse(url, ) kw["method"] = method _factory = self._client_factory(url, *a, **kw) if _scheme == "https" : from twisted.internet import ssl #_contextFactory = kw.get("contextFactory") #if _contextFactory is None : # _contextFactory = ssl.ClientContextFactory() _contextFactory = ssl.ClientContextFactory() reactor.connectSSL(_host, _port, _factory, _contextFactory, ) else: reactor.connectTCP(_host, _port, _factory) return _factory.deferred.addCallback( self._cb_request, _factory, url, ).addCallback( self._cb_request_debug, )
def getPagePrxoy(url, proxy=None, contextFactory=None, *args, **kwargs): ''' proxy= { host:192.168.1.111, port:6666 } ''' kwargs["timeout"] = 60 if proxy is None: scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) if scheme == b'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(client.nativeString(host), port, factory, contextFactory) else: reactor.connectTCP(client.nativeString(host), port, factory) return factory.deferred else: factory = client.HTTPClientFactory(url, *args, **kwargs) reactor.connectTCP(proxy["host"], proxy["port"], factory) return factory.deferred
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a new request via the configured proxy. """ scheme, host, port, path = _parse(uri) request_path = uri d = self._connect(scheme, host, port) if headers is None: headers = Headers() if not headers.hasHeader('host'): # This is a lot of copying. It might be nice if there were a bit # less. headers = Headers(dict(headers.getAllRawHeaders())) headers.addRawHeader('host', self._computeHostValue(scheme, host, port)) def cbConnected(proto): # NOTE: For the proxy case the path should be the full URI. return proto.request( Request(method, request_path, headers, bodyProducer)) d.addCallback(cbConnected) return d
def request(rq): headers = Headers(dict((k, [v]) for k, v in rq.headers.iteritems())) scheme, host, port, path = _parse(rq.url) headers.setRawHeaders('Host', [host]) return (rq.method, rq.url, headers, StringProducer(rq.data) if rq.data else None)
def getPageFactory(url, agent="BitTorrent client", bindAddress=None, contextFactory=None, proxy=None, timeout=120): """Download a web page as a string. Download a page. Return a deferred, which will callback with a page (as a string) or errback with a description of the error. See HTTPClientFactory to see what extra args can be passed. """ scheme, host, port, path = client._parse(url) if proxy: host, port = proxy.split(':') port = int(port) factory = HTTPProxyUnGzipClientFactory(url, agent=agent, proxy=proxy) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory, bindAddress=bindAddress, timeout=timeout) else: reactor.connectTCP(host, port, factory, bindAddress=bindAddress, timeout=timeout) return factory
def test_connects_right(self): """Uses the CONNECT method on the tunnel.""" tunnel_client = TunnelClient("0.0.0.0", self.tunnel_server.port, self.cookie) factory = client.HTTPClientFactory(self.dest_url) scheme, host, port, path = client._parse(self.dest_url) tunnel_client.connectTCP(host, port, factory) result = yield factory.deferred self.assertEqual(result, SAMPLE_CONTENT)
def download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True): """ Downloads a file from a specific URL and returns a Deferred. You can also specify a callback function to be called as parts are received. :param url: the url to download from :type url: string :param filename: the filename to save the file as :type filename: string :param callback: a function to be called when a part of data is received, it's signature should be: func(data, current_length, total_length) :type callback: function :param headers: any optional headers to send :type headers: dictionary :param force_filename: force us to use the filename specified rather than one the server may suggest :type force_filename: boolean :param allow_compression: allows gzip & deflate decoding :type allow_compression: boolean :returns: the filename of the downloaded file :rtype: Deferred :raises t.w.e.PageRedirect: when server responds with a temporary redirect or permanently moved. :raises t.w.e.Error: for all other HTTP response errors (besides OK) """ url = str(url) filename = str(filename) if headers: for key, value in headers.items(): headers[str(key)] = str(value) if allow_compression: if not headers: headers = {} headers["accept-encoding"] = "deflate, gzip, x-gzip" # In twisted 13.1.0 the _parse() function was replaced by the _URI class if hasattr(client, '_parse'): scheme, host, port, path = client._parse(url) else: from twisted.web import _URI uri = _URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port path = uri.originFrom factory = HTTPDownloader(url, filename, callback, headers, force_filename, allow_compression) if scheme == "https": from twisted.internet import ssl reactor.connectSSL(host, port, factory, ssl.ClientContextFactory()) else: reactor.connectTCP(host, port, factory) return factory.deferred
def getPage(url, contextFactory=None, proxy=None, *args, **kwargs): scheme, host, port, path = client._parse(url) if proxy: host, port = proxy.split(':') port = int(port) kwargs['proxy'] = proxy factory = HTTPClientFactory(url, *args, **kwargs) reactor.connectTCP(host, port, factory) #IGNORE:E1101 return factory.deferred
def conditional_get_page(url, contextFactory=None, *args, **kwargs): scheme, host, port, path = client._parse(url) factory = ConditionalHTTPClientFactory(url, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def downloadStream(url, contextFactory=None, *args, **kwargs): scheme, host, port, path = client._parse(url) factory = StreamDownloader(url, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def downloadWithProgress(url, outputfile, contextFactory=None, *args, **kwargs): scheme, host, port, path = client._parse(url) factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory == None : contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def sendPartnerBoxWebCommand(url, contextFactory=None, timeout=60, username = "******", password = "", *args, **kwargs): scheme, host, port, path = client._parse(url) basicAuth = encodestring(("%s:%s")%(username,password)) authHeader = "Basic " + basicAuth.strip() AuthHeaders = {"Authorization": authHeader} if kwargs.has_key("headers"): kwargs["headers"].update(AuthHeaders) else: kwargs["headers"] = AuthHeaders factory = myHTTPClientFactory(url, *args, **kwargs) reactor.connectTCP(host, port, factory, timeout=timeout) return factory.deferred
def fetch(self, uri, req_headers=None): # TODO: ims c = client.HTTPClientFactory( str(uri), timeout=self.fetch_timeout, headers=req_headers, # proxy=self.http_proxy # TODO: proxy support ) scheme, host, port, path = client._parse(uri) def callback(data): if data is None: self.site['map'] = None else: self.site['map'] = minidom.parseString(data) self.site['last_check_elapsed'] = time.time() - self.start_time remaining_lifetime = 0 try: cc_str = ", ".join(c.response_headers.get('cache-control', '')) max_age = int(parse_cc(cc_str).get('max-age', "0")) age = int(c.response_headers.get('age', ["0"])[-1]) self.site['expires'] = time.time() + max_age - age except ValueError: logging.info("Bad CC or Age header on <%s>" % uri) self.done_cb(self.site) c.deferred.addCallback(callback) def errback(data): if data.type == web_error.Error: if data.value[0] in ["404", "410"]: logging.warning("%s: %s" % (data.value[0], uri)) return callback(None) else: msg = '"%s"' % (data.value) elif data.type == expat.ExpatError: msg = '"XML parsing error (%s)"' % data.value elif data.type == internet_error.DNSLookupError: msg = '"DNS lookup error"' elif data.type == internet_error.TimeoutError: msg = '"Timeout"' elif data.type == internet_error.ConnectionRefusedError: msg = '"Connection refused"' elif data.type == internet_error.ConnectError: msg = '"Connection error"' else: msg = '"Unknown error (%s)"' % traceback.format_exc() self.error_cb(self.site, msg) c.deferred.addErrback(errback) self.reactor.connectTCP(host, port, c)
def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs): if hasattr(client, '_parse'): scheme, host, port, path = client._parse(url) else: from twisted.web.client import _URI uri = _URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port path = uri.path self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs) self.connection = reactor.connectTCP(host, port, self.factory)
def discardPage(url, stall=False, *args, **kwargs): """Start fetching the URL, but stall our pipe after the first 1MB. Wait 10 seconds, then resume downloading (and discarding) everything. """ # adapted from twisted.web.client.getPage . We can't just wrap or # subclass because it provides no way to override the HTTPClientFactory # that it creates. scheme, host, port, path = tw_client._parse(url) factory = StallableDiscardingHTTPClientFactory(url, *args, **kwargs) factory.do_stall = stall assert scheme == 'http' reactor.connectTCP(host, port, factory) return factory.deferred
def test_externalUnicodeInterference(self): """ L{client._parse} should return C{str} for the scheme, host, and path elements of its return tuple, even when passed an URL which has previously been passed to L{urlparse} as a C{unicode} string. """ badInput = u'http://example.com/path' goodInput = badInput.encode('ascii') urlparse(badInput) scheme, host, port, path = client._parse(goodInput) self.assertIsInstance(scheme, str) self.assertIsInstance(host, str) self.assertIsInstance(path, str)
def _make_factory(url, *args, **kwargs): scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) connect_kwargs = {} if "timeout" in kwargs: connect_kwargs["timeout"] = kwargs["timeout"] if scheme == "https": from twisted.internet import ssl contextFactory = ssl.ClientContextFactory() reactor.connectSSL( host, port, factory, contextFactory, **connect_kwargs) else: reactor.connectTCP(host, port, factory, **connect_kwargs) return factory
def _createDownloader(self, url, file, contextFactory=None,element=None, *args, **kwargs): scheme, host, port, path = client._parse(url) factory = Downloader(url=url, outfile=file, element=element, *args, **kwargs) if scheme == 'https': if contextFactory is None: try: from twisted.internet import ssl contextFactory = ssl.ClientContextFactory( ) reactor.connectSSL(host, port, factory, contextFactory) except Exception as inst: print("failed to create ssl http downloader") else: reactor.connectTCP(host, port, factory) return factory
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a new request. @param method: The request method to send. @type method: C{str} @param uri: The request URI send. @type uri: C{str} @param scheme: A string like C{'http'} or C{'https'} (the only two supported values) to use to determine how to establish the connection. @param host: A C{str} giving the hostname which will be connected to in order to issue a request. @param port: An C{int} giving the port number the connection will be on. @param path: A C{str} giving the path portion of the request URL. @param headers: The request headers to send. If no I{Host} header is included, one will be added based on the request URI. @type headers: L{Headers} @param bodyProducer: An object which will produce the request body or, if the request body is to be empty, L{None}. @type bodyProducer: L{IBodyProducer} provider @return: A L{Deferred} which fires with the result of the request (a L{Response} instance), or fails if there is a problem setting up a connection over which to issue the request. It may also fail with L{SchemeNotSupported} if the scheme of the given URI is not supported. @rtype: L{Deferred} """ scheme, host, port, path = _parse(uri) if headers is None: headers = Headers() if not headers.hasHeader('host'): # This is a lot of copying. It might be nice if there were a bit # less. headers = Headers(dict(headers.getAllRawHeaders())) headers.addRawHeader( 'host', self._computeHostValue(scheme, host, port)) if self.persistent: sem = self._semaphores.get((scheme, host, port)) if sem is None: sem = DeferredSemaphore(self.maxConnectionsPerHostName) self._semaphores[scheme, host, port] = sem return sem.run(self._request, method, scheme, host, port, path, headers, bodyProducer) else: return self._request( method, scheme, host, port, path, headers, bodyProducer)
def getPage(url): """This is a variant of the standard twisted.web.client.getPage, which is smart enough to shut off its connection when its done (even if it fails). """ from twisted.web import client scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url) c = reactor.connectTCP(host, port, factory) def shutdown(res, c): c.disconnect() return res factory.deferred.addBoth(shutdown, c) return factory.deferred
def getPage(url, bindAddress=None, *arg, **kw): # reimplemented here to insert bindAddress scheme, host, port, path = _parse(url) factory = HTTPClientFactory(url, *arg, **kw) if scheme == 'https': from twisted.internet import ssl context = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, context, bindAddress=bindAddress) else: reactor.connectTCP(host, port, factory, bindAddress=bindAddress) return factory.deferred
def getPage(url, contextFactory=None, *args, **kwargs): url = str(url) scheme, host, port, path = _parse(url) factory = HTTPClientFactory(url, *args, **kwargs) if False: # use a proxy host, port = 'proxy', 6060 factory.path = url if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def twoperation(self, *IClist, **Options): ''' OPC Operation ''' x = getattr(OpcSrv, op + 'SoapIn')() # Apply General attributes (Options) self.fill_tc(x, IClist, Options) # All Options should be gone, if not raise error if Options: raise TypeError('Unknown options given: %s', str(Options)) # Serialize typecode SOAPMessage = str(ZSI.SoapWriter().serialize(x, unique=True)) headers = { 'SOAPAction': 'http://opcfoundation.org/webservices/XMLDA/1.0/' + op, 'content-type': 'text/xml; charset=utf-8', 'content-length': str(len(SOAPMessage)) } # If '/' is not the end of the server address, the operation # fails. This should better be handled by the server if self.OPCServerAddress[-1] != '/': self.OPCServerAddress += '/' scheme, host, port, path = twclient._parse(self.OPCServerAddress) factory = twclient.HTTPClientFactory(self.OPCServerAddress, method='POST', postdata=SOAPMessage, headers=headers, agent='Twisted OPC XMLDA Client', timeout=0) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) # Add handle___Reponse to the callback chain n = getattr(self, 'twhandle' + op) factory.deferred.addCallback(n) factory.deferred.addErrback(handleFault) return factory.deferred
def _makeGetterFactory(url, factoryFactory, contextFactory=None, connectionTimeout=30, *args, **kwargs): """ This is a rip out of the same function from twisted, I simply needed it to support connection timeouts """ scheme, host, port, path = client._parse(url) factory = factoryFactory(url, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory, timeout=connectionTimeout) else: reactor.connectTCP(host, port, factory, timeout=connectionTimeout) return factory