def __init__(self, scraper, pool=None): self.scraper = scraper self._pool = pool self._agents = {} #map proxy->an agent redirectLimit = scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 #create an agent for direct requests self._direct_agent = Agent(reactor, pool=self._pool, connectTimeout=scraper.config.get('timeout') or 30) if redirectLimit>0: self._direct_agent = BrowserLikeRedirectAgent(self._direct_agent, redirectLimit=redirectLimit) self._direct_agent = ContentDecoderAgent(self._direct_agent, [('gzip', GzipDecoder)]) self.cj = self.scraper.client.opener.cj if self.cj is not None: self._direct_agent = CookieAgent(self._direct_agent, self.cj) #create an agent for http-proxy requests #no endpoint yet, use __ instead of _ to backup the instance self.__http_proxy_agent = ProxyAgent(None, pool=self._pool) if redirectLimit>0: self._http_proxy_agent = BrowserLikeRedirectAgent(self.__http_proxy_agent, redirectLimit=redirectLimit) self._http_proxy_agent = ContentDecoderAgent(self._http_proxy_agent, [('gzip', GzipDecoder)]) else: self._http_proxy_agent = ContentDecoderAgent(self.__http_proxy_agent, [('gzip', GzipDecoder)]) if self.cj is not None: self._http_proxy_agent = CookieAgent(self._http_proxy_agent, self.cj) #create an agent for https-proxy requests #no endpoint yet, use __ instead of _ to backup the instance self.__https_proxy_agent = TunnelingAgent(reactor=reactor, proxy=None, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) #no proxy yet if redirectLimit>0: self._https_proxy_agent = BrowserLikeRedirectAgent(self.__https_proxy_agent, redirectLimit=redirectLimit) self._https_proxy_agent = ContentDecoderAgent(self._https_proxy_agent, [('gzip', GzipDecoder)]) else: self._https_proxy_agent = ContentDecoderAgent(self.__https_proxy_agent, [('gzip', GzipDecoder)]) if self.cj is not None: self._https_proxy_agent = CookieAgent(self._https_proxy_agent, self.cj)
def _setUp(self): super(HTTPTest, self)._setUp() try: import OpenSSL except: log.err("Warning! pyOpenSSL is not installed. https websites will " "not work") self.control_agent = TrueHeadersSOCKS5Agent( reactor, proxyEndpoint=TCP4ClientEndpoint(reactor, '127.0.0.1', config.tor.socks_port)) self.report['socksproxy'] = None if self.localOptions['socksproxy']: try: sockshost, socksport = self.localOptions['socksproxy'].split( ':') self.report['socksproxy'] = self.localOptions['socksproxy'] except ValueError: raise InvalidSocksProxyOption socksport = int(socksport) self.agent = TrueHeadersSOCKS5Agent( reactor, proxyEndpoint=TCP4ClientEndpoint(reactor, sockshost, socksport)) else: self.agent = TrueHeadersAgent(reactor) self.report['agent'] = 'agent' if self.followRedirects: try: self.control_agent = FixedRedirectAgent(self.control_agent) self.agent = FixedRedirectAgent( self.agent, ignorePrivateRedirects=self.ignorePrivateRedirects) self.report['agent'] = 'redirect' except: log.err("Warning! You are running an old version of twisted " "(<= 10.1). I will not be able to follow redirects." "This may make the testing less precise.") if len(self.contentDecoders) > 0: self.control_agent = ContentDecoderAgent(self.control_agent, self.contentDecoders) self.agent = ContentDecoderAgent(self.agent, self.contentDecoders) self.processInputs() log.debug("Finished test setup")
def run(self) -> Type[Tuple]: agent = Agent(reactor, connectTimeout=self._timeout) # Add the gzip decoder if self._compressed: agent = ContentDecoderAgent(agent, [(b"gzip", GzipDecoder)]) binaryPayloadRequestProducer = _BinaryPayloadRequestProducer( self._payload, self._meta, self._isPayloadGzipped, ) # Make the web request response = yield agent.request( self._httpMethod, self._url, Headers(self._headers), binaryPayloadRequestProducer, ) self._meta = binaryPayloadRequestProducer.meta self._meta.code = response.code self._meta.version = response.version self._meta.headers = {k.decode(): v[0].decode() for k, v in response.headers.getAllRawHeaders()} # Get the responseTuple data responseProducer = self._cbResponse(response, self._meta) self._meta = responseProducer.meta return self._meta
def _request(self, request, callback): global pnconn_pool ## Build URL ''' url = self.origin + '/' + "/".join([ "".join([ ' ~`!@#$%^&*()+=[]\\{}|;\':",./<>?'.find(ch) > -1 and hex(ord(ch)).replace( '0x', '%' ).upper() or ch for ch in list(bit) ]) for bit in request]) ''' url = self.getUrl(request) agent = ContentDecoderAgent( RedirectAgent( Agent(reactor, contextFactory=WebClientContextFactory(), pool=self.ssl and None or pnconn_pool)), [('gzip', GzipDecoder)]) request = agent.request('GET', url, Headers(self.headers), None) def received(response): finished = Deferred() response.deliverBody(PubNubResponse(finished)) return finished def complete(data): callback(eval(data)) request.addCallback(received) request.addBoth(complete)
def request_gzipped_url(url, callback, errback=None, timeout=None, **kwargs): ''' Get URL with gzip-decoder support. ''' agent = ContentDecoderAgent(Agent(reactor), [('gzip', GzipDecoder)]) d = agent.request('GET', url, Headers({'User-Agent': ['gzip']})) def handleResponse(response, **kwargs): receiverDeferred = Deferred() receiverDeferred.addCallback(callback, **kwargs) receiver = StringReceiver(receiverDeferred) response.deliverBody(receiver) if timeout: timeoutCall = reactor.callLater(timeout, d.cancel) def completed(passthrough): if timeoutCall.active(): timeoutCall.cancel() return passthrough d.addBoth(completed) d.addCallback(handleResponse, **kwargs) if errback: d.addErrback(errback, **kwargs) return d
def build_agent(req): uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent = TunnelingAgent( reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=req.get('timeout')) else: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port, timeout=req.get('timeout')) agent = ProxyAgent(endpoint) if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = Agent(reactor) agent = RedirectAgent(agent, redirectLimit=3) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) return agent
def __init__(self, reactor, email, password): self.reactor = reactor self.email = email self.password = password # Set up an agent for sending HTTP requests. Uses cookies # (part of the authentication), persistent HTTP connection # pool, automatic content decoding (gzip) # container to keep track of cookies self.cookiejar = cookielib.CookieJar() # HTTP persistent connection pool self.pool = HTTPConnectionPool(self.reactor, persistent=True) # for some reason, using >1 connection per host fails self.pool.maxPersistentPerHost = 1 self.agent = ContentDecoderAgent( CookieAgent(Agent(self.reactor, pool=self.pool), self.cookiejar), [('gzip', GzipDecoder)]) # this is the token that is used to authenticate API requests self.xsrf_token = None self.auth_token = None # who we are self.player_nickname = None self.player_guid = None self.team = None self.ap = None self.level = None self.start_date = None self.new_version = False self.inventory_done = False self.profile_done = False # for keeping track of item inventory self.inventory = b07.inventory.Inventory() # for keeping track of API requests that are delayed until # authentication has completed self._deferred_api_requests = [] # for keeping track of periodic inventory refreshes self._periodic_inventory_refresh_delayedcall = None # list of functions to call every time inventory is refreshed self._on_inventory_refreshed = [] # do an immediate inventory refresh self._first_inventory_ready = self._defer_until_authenticated( self._inventory0, (), {}) # do an immediate profile refresh self._first_profile_ready = self._defer_until_authenticated( self._profile0, (), {}) # start the authentication process self.reactor.callLater(0, self._authenticate0)
def main(): agent = ContentDecoderAgent(Agent(reactor), [(b'gzip', GzipDecoder)]) d = agent.request(b'GET', b'http://httpbin.org/gzip') d.addCallback(printBody) d.addErrback(log.err) d.addCallback(lambda ignored: reactor.stop()) reactor.run()
def main(): agent = ContentDecoderAgent(Agent(reactor), [('gzip', GzipDecoder)]) d = agent.request('GET', 'http://www.yahoo.com/') d.addCallback(printBody) d.addErrback(log.err) d.addCallback(lambda ignored: reactor.stop()) reactor.run()
def __init__(self, hs): SimpleHttpClient.__init__(self, hs) # clobber the base class's agent and UA: self.agent = ContentDecoderAgent( BrowserLikeRedirectAgent( Agent.usingEndpointFactory(reactor, SpiderEndpointFactory(hs))), [(b'gzip', GzipDecoder)])
def _create_agent(self, req): """ create right agent for specific request """ agent = None uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) self._agents[agent_key] = agent else: #http agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout')) agent = ProxyAgent(endpoint, pool=self._pool) self._agents[agent_key] = agent if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = self._direct_agent #use single agent when no proxies used redirectLimit = self.scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 if redirectLimit>0: agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) if self.cj is not None: agent = CookieAgent(agent, self.cj) return agent
def __init__(self, *argz, **kwz): super(txOneDriveAPI, self).__init__(*argz, **kwz) pool = self.request_pool = QuietHTTPConnectionPool( reactor, debug_requests=self.debug_requests, **self.request_pool_options) self.request_agent = ContentDecoderAgent( RedirectAgent( Agent(reactor, TLSContextFactory(self.ca_certs_files), pool=pool)), [('gzip', GzipDecoder)])
def call(self, action, params=None, callback=None): if params is None: params = StalkerRequest.getDefaults(action) headers = StalkerRequest.getHeaders(self._identity, action, referer=self.baseurl) headers["X-User-Agent"] = [ "Model: MAG250; Link: WiFi", ] url = "%s%s?%s" % (self.baseurl, DEFAULT_ENDPOINT, urlencode(params)) Log.w(url) agent = ContentDecoderAgent(Agent(reactor), [('gzip', GzipDecoder)]) #Agent(reactor) def bodyCB(body): if isinstance(body, Failure): if isinstance(body.value, PartialDownloadError): body = body.value.response else: Log.w(body) callback(None) return try: result = json.loads(unicode(body)) Log.d(result) callback(result) except Exception as e: Log.w(body) callback(None) def bodyErrorCB(error=None): Log.w(error) def responseCB(response): d = readBody(response) d.addBoth(bodyCB) def errorCB(error=None): if (isinstance(error, PartialDownloadError)): responseCB(error.response) return Log.w(error) d = agent.request( 'GET', url, Headers(headers), ) d.addCallback(responseCB) d.addErrback(errorCB)
def __init__(self): self.pool = HTTPConnectionPool(reactor, persistent=True) self.pool.maxPersistentPerHost = 5 # 默认一个IP最大保持两个链接 self.pool.cachedConnectionTimeout = 50 # 默认240秒 contextFactory = WebClientContextFactory() raw_agent = Agent(reactor, contextFactory, pool=self.pool) agent = RedirectAgent( ContentDecoderAgent(raw_agent, [('gzip', GzipDecoder)])) self.cookieJar = CookieJar() self.agent = CookieAgent(agent, self.cookieJar) self.headers = {'User-agent': ['Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'], 'Accept-Language': ['zh-Hans-CN,zh-Hans;q=0.5'], 'Accept': ['text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'], 'Accept-Encoding': ['gb2313,utf-8;q=0.7,*;q=0.7'], 'Cache-Control': ['max-age=0']}
def get(url, data=None, on_response=None, on_error=None): errback = on_error or make_errback(frames_back=2) try: def handle_response(response): if response.code == 200: response.deliverBody(JsonReceiver.create(on_response, errback)) else: errback('returned %s' % response.code) agent = ContentDecoderAgent(Agent(reactor, pool=pool), [('gzip', GzipDecoder)]) headers = Headers(get_auth_headers()) headers.addRawHeader('User-Agent', 'gzip') d = agent.request('GET', url, headers=headers, bodyProducer=JsonProducer(data) if data else None) d.addCallbacks(handle_response, errback) except Exception as ex: errback('error %s' % ex)
def __init__(self, **config): for k, v in config.viewitems(): try: x = getattr(self, k) except AttributeError: raise AttributeError( 'Unrecognized configuration key: {}'.format(k)) if isinstance(x, Mapping) and isinstance(v, Mapping): v = AttrDict(v) v.rebase(AttrDict(x)) setattr(self, k, v) pool = QuietHTTPConnectionPool(reactor, persistent=True) for k, v in self.request_pool_options.viewitems(): getattr(pool, k) # to somewhat protect against typos setattr(pool, k, v) self.request_agent = ContentDecoderAgent( RedirectAgent( Agent(reactor, TLSContextFactory(self.ca_certs_files), pool=pool)), [('gzip', GzipDecoder)])
def __init__(self, url, callID=0, maxPersistentPerHost=2, useCompression=False, connectTimeout=None): self.url = url self.connectTimeout = connectTimeout self.encoder = self.get_encoder() assert IEncoder.providedBy(self.encoder), 'no encoder available or encoder does not provide IEncoder' assert isinstance(callID, (int, long)), "callID must be <type 'int'> or <type 'long'>" self.__callID = callID self.__callsCounter = 0 if maxPersistentPerHost > 0: self.pool = HTTPConnectionPool(reactor, persistent=True) self.pool.maxPersistentPerHost = maxPersistentPerHost else: self.pool = None agent = Agent(reactor, connectTimeout=self.connectTimeout, pool=self.pool) if useCompression: self.agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) else: self.agent = agent
def run(self): # convert tuple to json body = self._postTuple.tupleToRestfulJsonDict() body = json.dumps(body).encode("utf-8") # add http headers headers = Headers({ "User-Agent": ["synerty/1.0"], "Content-Type": ["application/json"] }) # Add the gzip decoder agent = ContentDecoderAgent(Agent(reactor), [(b"gzip", GzipDecoder)]) # Make the web request response = yield agent.request(self._httpMethod, self._url, headers, _BytesProducer(body)) # Get the response data responseData = yield self._cbResponse(response) # Convert the bytes into a tuple and return return self._parseTuple(responseData)
def request( self, method, url, *, params=None, headers=None, data=None, files=None, json=_NOTHING, auth=None, cookies=None, allow_redirects=True, browser_like_redirects=False, unbuffered=False, reactor=None, timeout=None, _stacklevel=2, ): """ See :func:`treq.request()`. """ method = method.encode('ascii').upper() if isinstance(url, DecodedURL): parsed_url = url.encoded_url elif isinstance(url, EncodedURL): parsed_url = url elif isinstance(url, str): # We use hyperlink in lazy mode so that users can pass arbitrary # bytes in the path and querystring. parsed_url = EncodedURL.from_text(url) else: parsed_url = EncodedURL.from_text(url.decode('ascii')) # Join parameters provided in the URL # and the ones passed as argument. if params: parsed_url = parsed_url.replace( query=parsed_url.query + tuple(_coerced_query_params(params))) url = parsed_url.to_uri().to_text().encode('ascii') headers = self._request_headers(headers, _stacklevel + 1) bodyProducer, contentType = self._request_body(data, files, json, stacklevel=_stacklevel + 1) if contentType is not None: headers.setRawHeaders(b'Content-Type', [contentType]) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) if allow_redirects: if browser_like_redirects: wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) if reactor is None: from twisted.internet import reactor if timeout: delayedCall = reactor.callLater(timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not unbuffered: d.addCallback(_BufferedResponse) return d.addCallback(_Response, cookies)
def __init__(self): self.agent = ContentDecoderAgent(RedirectAgent(Agent(reactor)), [(b'gzip', GzipDecoder)])
def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) self.agent = ContentDecoderAgent(Agent(reactor), [(b'gzip', GzipDecoder)]) self.deferred_results = []
def request(self, method, url, **kwargs): """ See :func:`treq.request()`. """ method = method.encode('ascii').upper() stacklevel = kwargs.pop('_stacklevel', 2) if isinstance(url, DecodedURL): parsed_url = url elif isinstance(url, EncodedURL): parsed_url = DecodedURL(url) elif isinstance(url, six.text_type): parsed_url = DecodedURL.from_text(url) else: parsed_url = DecodedURL.from_text(url.decode('ascii')) # Join parameters provided in the URL # and the ones passed as argument. params = kwargs.pop('params', None) if params: parsed_url = parsed_url.replace( query=parsed_url.query + tuple(_coerced_query_params(params))) url = parsed_url.to_uri().to_text().encode('ascii') # Convert headers dictionary to # twisted raw headers format. headers = kwargs.pop('headers', None) if headers: if isinstance(headers, dict): h = Headers({}) for k, v in headers.items(): if isinstance(v, (bytes, six.text_type)): h.addRawHeader(k, v) elif isinstance(v, list): h.setRawHeaders(k, v) headers = h else: headers = Headers({}) bodyProducer, contentType = self._request_body( data=kwargs.pop('data', None), files=kwargs.pop('files', None), json=kwargs.pop('json', _NOTHING), stacklevel=stacklevel, ) if contentType is not None: headers.setRawHeaders(b'Content-Type', [contentType]) cookies = kwargs.pop('cookies', {}) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) browser_like_redirects = kwargs.pop('browser_like_redirects', False) if kwargs.pop('allow_redirects', True): if browser_like_redirects: wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) auth = kwargs.pop('auth', None) if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) reactor = kwargs.pop('reactor', None) if reactor is None: from twisted.internet import reactor timeout = kwargs.pop('timeout', None) if timeout: delayedCall = reactor.callLater(timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not kwargs.pop('unbuffered', False): d.addCallback(_BufferedResponse) if kwargs: warnings.warn( ("Got unexpected keyword argument: {}." " treq will ignore this argument," " but will raise TypeError in the next treq release.").format( ", ".join(repr(k) for k in kwargs)), DeprecationWarning, stacklevel=stacklevel, ) return d.addCallback(_Response, cookies)
def request(self, method, url, **kwargs): method = method.upper() # Join parameters provided in the URL # and the ones passed as argument. params = kwargs.get('params') if params: url = _combine_query_params(url, params) # Convert headers dictionary to # twisted raw headers format. headers = kwargs.get('headers') if headers: if isinstance(headers, dict): h = Headers({}) for k, v in headers.iteritems(): if isinstance(v, str): h.addRawHeader(k, v) else: h.setRawHeaders(k, v) headers = h else: headers = Headers({}) # Here we choose a right producer # based on the parameters passed in. bodyProducer = None data = kwargs.get('data') files = kwargs.get('files') if files: # If the files keyword is present we will issue a # multipart/form-data request as it suits better for cases # with files and/or large objects. files = list(_convert_files(files)) boundary = uuid.uuid4() headers.setRawHeaders( 'content-type', [ 'multipart/form-data; boundary=%s' % (boundary,)]) if data: data = _convert_params(data) else: data = [] bodyProducer = multipart.MultiPartProducer( data + files, boundary=boundary) elif data: # Otherwise stick to x-www-form-urlencoded format # as it's generally faster for smaller requests. if isinstance(data, (dict, list, tuple)): headers.setRawHeaders( 'content-type', ['application/x-www-form-urlencoded']) data = urlencode(data, doseq=True) bodyProducer = IBodyProducer(data) cookies = kwargs.get('cookies', {}) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) if kwargs.get('allow_redirects', True): wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [('gzip', GzipDecoder)]) auth = kwargs.get('auth') if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request( method, url, headers=headers, bodyProducer=bodyProducer) timeout = kwargs.get('timeout') if timeout: delayedCall = default_reactor(kwargs.get('reactor')).callLater( timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not kwargs.get('unbuffered', False): d.addCallback(_BufferedResponse) return d.addCallback(_Response, cookies)
def _setContentDecoder(self, proxy): proxy.agent = ContentDecoderAgent(proxy.agent, [('gzip', GzipDecoder)])
def test_EncodingJSONRPCServer(self): DATA = {'foo': 'bar'} REQUEST = '{"jsonrpc": "2.0", "method": "test", "params": [], "id": 1}' RESPONSE = '{"jsonrpc": "2.0", "id": 1, "result": ' + json.dumps(DATA) + '}' class RPCServer(JSONRPCServer): def jsonrpc_test(self): return defer.succeed(DATA) class ReceiverProtocol(Protocol): def __init__(self, finished): self.finished = finished self.body = [] def dataReceived(self, bytes): self.body.append(bytes) def connectionLost(self, reason): self.finished.callback(b''.join(self.body)) @implementer(IBodyProducer) class StringProducer(object): def __init__(self, body): self.body = body self.length = len(body) def startProducing(self, consumer): consumer.write(self.body.encode()) return defer.succeed(None) def pauseProducing(self): pass def stopProducing(self): pass server = RPCServer() resource = EncodingJSONRPCServer(server) site = Site(resource) port = reactor.listenTCP(8888, site, interface='127.0.0.1') agent = ContentDecoderAgent(Agent(reactor), [(b'gzip', GzipDecoder)]) response = yield agent.request(b'POST', b'http://127.0.0.1:8888', Headers({'Accept-Encoding': ['gzip']}), StringProducer(REQUEST)) self.assertTrue(isinstance(response, GzipDecoder)) finished = defer.Deferred() response.deliverBody(ReceiverProtocol(finished)) data = yield finished self.assert_json(data, RESPONSE) port.stopListening()
def request(self, method, url, **kwargs): """ See :func:`treq.request()`. """ method = method.encode('ascii').upper() if isinstance(url, unicode): parsed_url = URL.from_text(url) else: parsed_url = URL.from_text(url.decode('ascii')) # Join parameters provided in the URL # and the ones passed as argument. params = kwargs.get('params') if params: parsed_url = parsed_url.replace( query=parsed_url.query + tuple(_coerced_query_params(params))) url = parsed_url.to_uri().to_text().encode('ascii') # Convert headers dictionary to # twisted raw headers format. headers = kwargs.get('headers') if headers: if isinstance(headers, dict): h = Headers({}) for k, v in headers.items(): if isinstance(v, (bytes, unicode)): h.addRawHeader(k, v) elif isinstance(v, list): h.setRawHeaders(k, v) headers = h else: headers = Headers({}) # Here we choose a right producer # based on the parameters passed in. bodyProducer = None data = kwargs.get('data') files = kwargs.get('files') # since json=None needs to be serialized as 'null', we need to # explicitly check kwargs for this key has_json = 'json' in kwargs if files: # If the files keyword is present we will issue a # multipart/form-data request as it suits better for cases # with files and/or large objects. files = list(_convert_files(files)) boundary = str(uuid.uuid4()).encode('ascii') headers.setRawHeaders( b'content-type', [b'multipart/form-data; boundary=' + boundary]) if data: data = _convert_params(data) else: data = [] bodyProducer = multipart.MultiPartProducer(data + files, boundary=boundary) elif data: # Otherwise stick to x-www-form-urlencoded format # as it's generally faster for smaller requests. if isinstance(data, (dict, list, tuple)): headers.setRawHeaders(b'content-type', [b'application/x-www-form-urlencoded']) data = urlencode(data, doseq=True) bodyProducer = self._data_to_body_producer(data) elif has_json: # If data is sent as json, set Content-Type as 'application/json' headers.setRawHeaders(b'content-type', [b'application/json; charset=UTF-8']) content = kwargs['json'] json = json_dumps(content, separators=(u',', u':')).encode('utf-8') bodyProducer = self._data_to_body_producer(json) cookies = kwargs.get('cookies', {}) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) if kwargs.get('allow_redirects', True): if kwargs.get('browser_like_redirects', False): wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) auth = kwargs.get('auth') if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) timeout = kwargs.get('timeout') if timeout: delayedCall = default_reactor(kwargs.get('reactor')).callLater( timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not kwargs.get('unbuffered', False): d.addCallback(_BufferedResponse) return d.addCallback(_Response, cookies)
def http_request(self, url, http_request_headers, include_http_responses=False): key = url + json.dumps(http_request_headers) cached_value = yield self.lookup('http_request', key) if cached_value is not None: if include_http_responses is not True: cached_value.pop('responses', None) defer.returnValue(cached_value) page_info = { 'body_length': -1, 'status_code': -1, 'headers': {}, 'failure': None } agent = ContentDecoderAgent( FixedRedirectAgent(TrueHeadersAgent(reactor), ignorePrivateRedirects=True), [('gzip', GzipDecoder)] ) try: retries = 0 while True: try: response = yield agent.request('GET', url, TrueHeaders(http_request_headers)) headers = {} for name, value in response.headers.getAllRawHeaders(): headers[name] = unicode(value[0], errors='ignore') body_length = -1 body = None try: body = yield readBody(response) body_length = len(body) except PartialDownloadError as pde: if pde.response: body_length = len(pde.response) body = pde.response page_info['body_length'] = body_length page_info['status_code'] = response.code page_info['headers'] = headers page_info['title'] = extractTitle(body) response.body = body page_info['responses'] = encodeResponses(response) break except: if retries > self.http_retries: raise retries += 1 except DNSLookupError: page_info['failure'] = 'dns_lookup_error' except TimeoutError: page_info['failure'] = 'generic_timeout_error' except ConnectionRefusedError: page_info['failure'] = 'connection_refused_error' except ConnectError: page_info['failure'] = 'connect_error' except Exception as exc: # XXX map more failures page_info['failure'] = 'unknown_error' log.err("Unknown error occurred") log.exception(exc) yield self.cache_value('http_request', key, page_info) if include_http_responses is not True: page_info.pop('responses', None) defer.returnValue(page_info)