def main(): thing = mything() # Mandatory first argument is a URL to fetch over Tor (or whatever # SOCKS proxy that is running on localhost:9050). url = sys.argv[1] proxy = (None, 'localhost', 9050, True, None, None) f = client.HTTPClientFactory(url) f.deferred.addCallback(thing.clientcb) sw = thing.sockswrapper(proxy, url) d = sw.connect(f) d.addCallback(thing.wrappercb) thing.npages += 1 # Optional second argument is a URL to fetch over whatever SOCKS # proxy that is running on localhost:1080 (possibly `twistd -n socks'). if len(sys.argv) > 2: url2 = sys.argv[2] proxy2 = (None, 'localhost', 1080, True, None, None) f2 = client.HTTPClientFactory(url) f2.deferred.addCallback(thing.clientcb) sw2 = thing.sockswrapper(proxy2, url2) d2 = sw2.connect(f2) d2.addCallback(thing.wrappercb) thing.npages += 1 reactor.run()
def ooni_main(self, cmd): # We don't have the Command object so cheating for now. url = cmd.hostname # FIXME: validate that url is on the form scheme://host[:port]/path scheme, host, port, path = client._parse(url) ctrl_dest = self.endpoint(scheme, host, port) if not ctrl_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) if cmd.controlproxy: assert scheme != 'https', "no support for proxied https atm, sorry" _, proxy_host, proxy_port, _ = client._parse(cmd.controlproxy) control = SOCKSWrapper(reactor, proxy_host, proxy_port, ctrl_dest) print "proxy: ", proxy_host, proxy_port else: control = ctrl_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('control', x)) control.connect(f) exp_dest = self.endpoint(scheme, host, port) if not exp_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) # FIXME: use the experiment proxy if there is one experiment = exp_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('experiment', x)) experiment.connect(f) reactor.run()
def ooni_main(self): # We don't have the Command object so cheating for now. url = 'http://check.torproject.org/' self.controlproxy = 'socks4a://127.0.0.1:9050' self.experimentalproxy = '' if not re.match("[a-zA-Z0-9]+\:\/\/[a-zA-Z0-9]+", url): return None scheme, host, port, path = client._parse(url) ctrl_dest = self.endpoint(scheme, host, port) if not ctrl_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) if self.controlproxy: _, proxy_host, proxy_port, _ = client._parse(self.controlproxy) control = SOCKSWrapper(reactor, proxy_host, proxy_port, ctrl_dest) else: control = ctrl_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('control', x)) control.connect(f) exp_dest = self.endpoint(scheme, host, port) if not exp_dest: raise Exception('unsupported scheme %s in %s' % (scheme, url)) # FIXME: use the experiment proxy if there is one experiment = exp_dest f = client.HTTPClientFactory(url) f.deferred.addCallback(lambda x: self.cb('experiment', x)) experiment.connect(f) reactor.run()
def getPagePrxoy(url, proxy=None, contextFactory=None, *args, **kwargs): ''' proxy= { host:192.168.1.111, port:6666 } ''' kwargs["timeout"] = 60 if proxy is None: scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) if scheme == b'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(client.nativeString(host), port, factory, contextFactory) else: reactor.connectTCP(client.nativeString(host), port, factory) return factory.deferred else: factory = client.HTTPClientFactory(url, *args, **kwargs) reactor.connectTCP(proxy["host"], proxy["port"], factory) return factory.deferred
def get_page_with_header(url, contextFactory=None, *args, **kwargs): """ Download a web page as a string. Download a page. Return a deferred, which will callback with a page (as a string) or errback with a description of the error. See HTTPClientFactory to see what extra args can be passed. """ if type(url) == str: url = url.encode("utf-8") purl = urllib.parse.urlparse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) if purl.scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(purl.hostname, purl.port or 443, factory, contextFactory) else: reactor.connectTCP(purl.hostname, purl.port or 80, factory) def cb(page): return defer.succeed((page, factory.response_headers)) factory.deferred.addCallback(cb) return factory.deferred
def getProxyPage(url, contextFactory=None, host=None, port=None, status=None, *args, **kwargs): """Download a web page as a string. (modified from twisted.web.client.getPage) Download a page. Return a deferred, which will callback with a page (as a string) or errback with a description of the error. See HTTPClientFactory to see what extra args can be passed. """ if status > 300 and status < 304: factory = RedirHTTPClientFactory(url, *args, **kwargs) else: factory = client.HTTPClientFactory(url, *args, **kwargs) host = host or factory.host # We should connect to different port taken from URI if specified port = factory.port or port if factory.scheme == 'https': if contextFactory is None: contextFactory = SSLClientContextFactory(factory.host) reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def get_page(self, contextFactory=None, description=None, *args, **kwargs): if description is None: description = self.url scheme, _, _, _ = self.url_parse(self.url) factory = txwebclient.HTTPClientFactory(self.url, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() if self.use_proxy: reactor.connectSSL(self.proxy_host, self.proxy_port, factory, contextFactory) else: reactor.connectSSL(self.host, self.port, factory, contextFactory) else: if self.use_proxy: reactor.connectTCP(self.proxy_host, self.proxy_port, factory) else: reactor.connectTCP(self.host, self.port, factory) if self.return_headers: return factory.deferred.addCallback( lambda page: (page, factory.response_headers)) else: return factory.deferred
def testCookieHeaderParsing(self): factory = client.HTTPClientFactory('http://foo.example.com/') proto = factory.buildProtocol('127.42.42.42') proto.transport = FakeTransport() proto.connectionMade() for line in [ '200 Ok', 'Squash: yes', 'Hands: stolen', 'Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT', 'Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/', 'Set-Cookie: SHIPPING=FEDEX; path=/foo', '', 'body', 'more body', ]: proto.dataReceived(line + '\r\n') self.assertEquals(proto.transport.data, [ 'GET / HTTP/1.0\r\n', 'Host: foo.example.com\r\n', 'User-Agent: Twisted PageGetter\r\n', '\r\n' ]) self.assertEquals( factory.cookies, { 'CUSTOMER': 'WILE_E_COYOTE', 'PART_NUMBER': 'ROCKET_LAUNCHER_0001', 'SHIPPING': 'FEDEX', })
def testFactoryInfo(self): url = self.getURL('file') scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url) reactor.connectSSL(host, port, factory, ssl.ClientContextFactory()) # The base class defines _cbFactoryInfo correctly for this return factory.deferred.addCallback(self._cbFactoryInfo, factory)
def test_earlyHeaders(self): """ When a connection is made, L{HTTPPagerGetter} sends the headers from its factory's C{headers} dict. If I{Host} or I{Content-Length} is present in this dict, the values are not sent, since they are sent with special values before the C{headers} dict is processed. If I{User-Agent} is present in the dict, it overrides the value of the C{agent} attribute of the factory. If I{Cookie} is present in the dict, its value is added to the values from the factory's C{cookies} attribute. """ factory = client.HTTPClientFactory( b'http://foo/bar', agent=b"foobar", cookies={b'baz': b'quux'}, postdata=b"some data", headers={ b'Host': b'example.net', b'User-Agent': b'fooble', b'Cookie': b'blah blah', b'Content-Length': b'12981', b'Useful': b'value'}) transport = StringTransport() protocol = client.HTTPPageGetter() protocol.factory = factory protocol.makeConnection(transport) result = transport.value() for expectedHeader in [ b"Host: example.net\r\n", b"User-Agent: foobar\r\n", b"Content-Length: 9\r\n", b"Useful: value\r\n", b"connection: close\r\n", b"Cookie: blah blah; baz=quux\r\n"]: self.assertIn(expectedHeader, result)
def getProxyPage(url, contextFactory=None, host=None, port=None, *args, **kwargs): """Download a web page as a string. (modified from twisted.web.client.getPage) Download a page. Return a deferred, which will callback with a page (as a string) or errback with a description of the error. See HTTPClientFactory to see what extra args can be passed. """ factory = client.HTTPClientFactory(url, *args, **kwargs) host = host or factory.host port = port or factory.port if factory.scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def testCookieHeaderParsing(self): factory = client.HTTPClientFactory(b'http://foo.example.com/') proto = factory.buildProtocol('127.42.42.42') transport = StringTransport() proto.makeConnection(transport) for line in [ b'200 Ok', b'Squash: yes', b'Hands: stolen', b'Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT', b'Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/', b'Set-Cookie: SHIPPING=FEDEX; path=/foo', b'', b'body', b'more body', ]: proto.dataReceived(line + b'\r\n') self.assertEqual(transport.value(), b'GET / HTTP/1.0\r\n' b'Host: foo.example.com\r\n' b'User-Agent: Twisted PageGetter\r\n' b'\r\n') self.assertEqual(factory.cookies, { b'CUSTOMER': b'WILE_E_COYOTE', b'PART_NUMBER': b'ROCKET_LAUNCHER_0001', b'SHIPPING': b'FEDEX', })
def getPage(url, contextFactory=None, *args, **kwargs): """Download a web page as a string. Download a page. Return a HTTPClientFactory See HTTPClientFactory to see what extra args can be passed. """ #scheme, host, port, path = client._parse(url) scheme, _ = url.split('://', 1) host_port, path = _.split('/', 1) try: host, port = host_port.split(':') port = int(port) except Exception: host = host_port port = 80 path = '/'+path factory = client.HTTPClientFactory(url, *args, **kwargs) factory.noisy = False if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory
def _getPage(self): factory = client.HTTPClientFactory(self.proxyHost, self.url) factory.headers = {'pragma': 'no-cache'} reactor.connectTCP(self.proxyHost, self.proxyPort, factory) d = factory.deferred d.addErrback(self.noPage) d.addCallback(self.page)
def request(self, method, uri, headers=None, bodyProducer=None): url = urlparse.urlparse(uri, scheme='http') host = url.hostname port = url.port if port is None: port = 443 if (url.scheme == 'https') else 80 # Translate from Agent's Headers object back into a dict. if headers is not None: old_headers = {} for name, value_list in headers.getAllRawHeaders(): old_headers[name] = value_list[0] headers = old_headers f = client.HTTPClientFactory(uri, method=method, headers=headers, timeout=2) def gotResponse(page): return _HTTP10Agent._FakeResponse(int(f.status)) f.deferred.addBoth(gotResponse) if url.scheme == 'https': self._reactor.connectSSL(host, port, f, ClientContextFactory()) else: self._reactor.connectTCP(host, port, f) return f.deferred
def testFactoryInfo(self): url = self.getURL('file') uri = client.URI.fromBytes(url) factory = client.HTTPClientFactory(url) reactor.connectSSL(nativeString(uri.host), uri.port, factory, ssl.ClientContextFactory()) # The base class defines _cbFactoryInfo correctly for this return factory.deferred.addCallback(self._cbFactoryInfo, factory)
def test_setURLRelativePath(self): """ L{client.HTTPClientFactory.setURL} alters the path in a relative URL. """ f = client.HTTPClientFactory(b'http://example.com') url = b'/hello' f.setURL(url) self.assertEqual((url, b'http', b'example.com', 80, b'/hello'), (f.url, f.scheme, f.host, f.port, f.path))
def test_setURL(self): """ L{client.HTTPClientFactory.setURL} alters the scheme, host, port and path for absolute URLs. """ url = b'http://example.com' f = client.HTTPClientFactory(url) self.assertEqual((url, b'http', b'example.com', 80, b'/'), (f.url, f.scheme, f.host, f.port, f.path))
def test_HTTPPort80(self): """ No port should be included in the host header when connecting to the default HTTP port even if it is in the URL. """ factory = client.HTTPClientFactory(b'http://foo.example.com:80/') proto = factory.buildProtocol('127.42.42.42') proto.makeConnection(StringTransport()) self.assertEqual(self._getHost(proto.transport.value()), b'foo.example.com')
def request_url(url): u = client.URI.fromBytes(url) factory = client.HTTPClientFactory(url, **REQUEST_DEFAULTS) if u.scheme == 'https': reactor.connectSSL(u.host, u.port, factory, ssl.ClientContextFactory()) else: reactor.connectTCP(u.host, u.port, factory) return factory.deferred
def test_HTTPSDefaultPort(self): """ No port should be included in the host header when connecting to the default HTTPS port. """ factory = client.HTTPClientFactory('https://foo.example.com/') proto = factory.buildProtocol('127.42.42.42') proto.makeConnection(StringTransport()) self.assertEquals(self._getHost(proto.transport.value()), 'foo.example.com')
def test_setURLRemovesFragment(self): """ L{client.HTTPClientFactory.setURL} removes the fragment identifier from the path component. """ f = client.HTTPClientFactory(b'http://example.com') url = b'https://foo.com:8443/bar;123?a#frag' f.setURL(url) self.assertEqual((url, b'https', b'foo.com', 8443, b'/bar;123?a'), (f.url, f.scheme, f.host, f.port, f.path))
def test_HTTPSNotPort443(self): """ The port should be included in the host header when connecting to the a non default HTTPS port. """ factory = client.HTTPClientFactory(b'http://foo.example.com:8080/') proto = factory.buildProtocol('127.42.42.42') proto.makeConnection(StringTransport()) self.assertEqual(self._getHost(proto.transport.value()), b'foo.example.com:8080')
def test_setURLRelativeScheme(self): """ L{client.HTTPClientFactory.setURL} alters the host and port in a scheme-relative URL. """ f = client.HTTPClientFactory(b'http://example.com') url = b'//foo.com:81/bar' f.setURL(url) self.assertEqual((url, b'http', b'foo.com', 80, b'/bar'), (f.url, f.scheme, f.host, f.port, f.path))
def fetch(self, uri, req_headers=None): # TODO: ims c = client.HTTPClientFactory( str(uri), timeout=self.fetch_timeout, headers=req_headers, # proxy=self.http_proxy # TODO: proxy support ) scheme, host, port, path = client._parse(uri) def callback(data): if data is None: self.site['map'] = None else: self.site['map'] = minidom.parseString(data) self.site['last_check_elapsed'] = time.time() - self.start_time remaining_lifetime = 0 try: cc_str = ", ".join(c.response_headers.get('cache-control', '')) max_age = int(parse_cc(cc_str).get('max-age', "0")) age = int(c.response_headers.get('age', ["0"])[-1]) self.site['expires'] = time.time() + max_age - age except ValueError: logging.info("Bad CC or Age header on <%s>" % uri) self.done_cb(self.site) c.deferred.addCallback(callback) def errback(data): if data.type == web_error.Error: if data.value[0] in ["404", "410"]: logging.warning("%s: %s" % (data.value[0], uri)) return callback(None) else: msg = '"%s"' % (data.value) elif data.type == expat.ExpatError: msg = '"XML parsing error (%s)"' % data.value elif data.type == internet_error.DNSLookupError: msg = '"DNS lookup error"' elif data.type == internet_error.TimeoutError: msg = '"Timeout"' elif data.type == internet_error.ConnectionRefusedError: msg = '"Connection refused"' elif data.type == internet_error.ConnectError: msg = '"Connection error"' else: msg = '"Unknown error (%s)"' % traceback.format_exc() self.error_cb(self.site, msg) c.deferred.addErrback(errback) self.reactor.connectTCP(host, port, c)
def makeRequest(address_spec, data, callback, errback): # Change the signature of the errback def wrapper(error): errback(address_spec, error) host, port = address_spec factory = client.HTTPClientFactory('/', agent='PasswordChunker', method='POST', postdata=json.dumps(data)) factory.deferred.addCallback(callback) factory.deferred.addErrback(wrapper) reactor.connectTCP(host, port, factory)
def _make_factory(url, *args, **kwargs): scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url, *args, **kwargs) connect_kwargs = {} if "timeout" in kwargs: connect_kwargs["timeout"] = kwargs["timeout"] if scheme == "https": from twisted.internet import ssl contextFactory = ssl.ClientContextFactory() reactor.connectSSL( host, port, factory, contextFactory, **connect_kwargs) else: reactor.connectTCP(host, port, factory, **connect_kwargs) return factory
def getPage(url): """This is a variant of the standard twisted.web.client.getPage, which is smart enough to shut off its connection when its done (even if it fails). """ from twisted.web import client scheme, host, port, path = client._parse(url) factory = client.HTTPClientFactory(url) c = reactor.connectTCP(host, port, factory) def shutdown(res, c): c.disconnect() return res factory.deferred.addBoth(shutdown, c) return factory.deferred
def twoperation(self, *IClist, **Options): ''' OPC Operation ''' x = getattr(OpcSrv, op + 'SoapIn')() # Apply General attributes (Options) self.fill_tc(x, IClist, Options) # All Options should be gone, if not raise error if Options: raise TypeError('Unknown options given: %s', str(Options)) # Serialize typecode SOAPMessage = str(ZSI.SoapWriter().serialize(x, unique=True)) headers = { 'SOAPAction': 'http://opcfoundation.org/webservices/XMLDA/1.0/' + op, 'content-type': 'text/xml; charset=utf-8', 'content-length': str(len(SOAPMessage)) } # If '/' is not the end of the server address, the operation # fails. This should better be handled by the server if self.OPCServerAddress[-1] != '/': self.OPCServerAddress += '/' scheme, host, port, path = twclient._parse(self.OPCServerAddress) factory = twclient.HTTPClientFactory(self.OPCServerAddress, method='POST', postdata=SOAPMessage, headers=headers, agent='Twisted OPC XMLDA Client', timeout=0) if scheme == 'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) # Add handle___Reponse to the callback chain n = getattr(self, 'twhandle' + op) factory.deferred.addCallback(n) factory.deferred.addErrback(handleFault) return factory.deferred
def test_duplicateHeaderCGI(self): """ If a CGI script emits two instances of the same header, both are sent in the response. """ cgiFilename = self.writeCGI(DUAL_HEADER_CGI) portnum = self.startServer(cgiFilename) url = "http://localhost:%d/cgi" % (portnum,) factory = client.HTTPClientFactory(url) reactor.connectTCP('localhost', portnum, factory) def checkResponse(ignored): self.assertEqual( factory.response_headers['header'], ['spam', 'eggs']) factory.deferred.addCallback(checkResponse) return factory.deferred