def test_setdefault(self): h = Headers() hlist = ['ip1', 'ip2'] olist = h.setdefault('X-Forwarded-For', hlist) self.assertIsNot(h.getlist('X-Forwarded-For'), hlist) self.assertIs(h.getlist('X-Forwarded-For'), olist) h = Headers() olist = h.setdefault('X-Forwarded-For', 'ip1') self.assertEqual(h.getlist('X-Forwarded-For'), ['ip1']) self.assertIs(h.getlist('X-Forwarded-For'), olist)
class CrawlmiHTPPClientFactory(HTTPClientFactory): protocol = CrawlmiHTTPClient waiting = 1 noisy = False followRedirect = False afterFoundGet = False def __init__(self, request, timeout=180, download_size=0): self.url = urldefrag(request.url)[0] self.method = request.method self.body = request.body or None self.headers = Headers(request.headers) self.response_headers = None self.start_time = time() self.deferred = defer.Deferred() self.deferred.addCallback(self._build_response, request) self.invalid_headers = [] self.timeout = timeout self.download_size = download_size # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected # to have _disconnectedDeferred. See Twisted r32329. # As Crawlmi implements it's own logic to handle redirects is not # needed to add the callback _waitForDisconnect. # Specifically this avoids the AttributeError exception when # clientConnectionFailed method is called. self._disconnectedDeferred = defer.Deferred() self._set_connection_attributes(request) # set Host header based on url self.headers.setdefault('Host', self.netloc) # set Content-Length based len of body if self.body is not None: self.headers['Content-Length'] = len(self.body) # just in case a broken http/1.1 decides to keep connection alive self.headers.setdefault('Connection', 'close') def _build_response(self, body, request): if self.invalid_headers: raise BadHttpHeaderError('Invalid headers received: %s' % self.invalid_headers) response_cls = resp_factory.from_args(headers=self.response_headers, url=self.url) response = response_cls( url=self.url, status=self.status, headers=self.response_headers, body=body, request=request) response.download_latency = self.headers_time - self.start_time return response def _set_connection_attributes(self, request): self.scheme, self.netloc, self.host, self.port, self.path = \ _parse_url_args(request.url) if request.proxy: self.scheme, _, self.host, self.port, _ = \ _parse_url_args(request.proxy) self.path = self.url def gotStatus(self, version, status, message): self.version, self.status, self.message = version, int(status), message def gotHeaders(self, headers): self.headers_time = time() self.response_headers = headers
class CrawlmiHTPPClientFactory(HTTPClientFactory): protocol = CrawlmiHTTPClient waiting = 1 noisy = False followRedirect = False afterFoundGet = False def __init__(self, request, timeout=180, download_size=0): self.url = urldefrag(request.url)[0] self.method = request.method self.body = request.body or None self.headers = Headers(request.headers) self.response_headers = None self.start_time = time() self.deferred = defer.Deferred() self.deferred.addCallback(self._build_response, request) self.invalid_headers = [] self.timeout = timeout self.download_size = download_size # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected # to have _disconnectedDeferred. See Twisted r32329. # As Crawlmi implements it's own logic to handle redirects is not # needed to add the callback _waitForDisconnect. # Specifically this avoids the AttributeError exception when # clientConnectionFailed method is called. self._disconnectedDeferred = defer.Deferred() self._set_connection_attributes(request) # set Host header based on url self.headers.setdefault('Host', self.netloc) # set Content-Length based len of body if self.body is not None: self.headers['Content-Length'] = len(self.body) # just in case a broken http/1.1 decides to keep connection alive self.headers.setdefault('Connection', 'close') def _build_response(self, body, request): if self.invalid_headers: raise BadHttpHeaderError('Invalid headers received: %s' % self.invalid_headers) response_cls = resp_factory.from_args(headers=self.response_headers, url=self.url) response = response_cls(url=self.url, status=self.status, headers=self.response_headers, body=body, request=request) response.download_latency = self.headers_time - self.start_time return response def _set_connection_attributes(self, request): self.scheme, self.netloc, self.host, self.port, self.path = \ _parse_url_args(request.url) if request.proxy: self.scheme, _, self.host, self.port, _ = \ _parse_url_args(request.proxy) self.path = self.url def gotStatus(self, version, status, message): self.version, self.status, self.message = version, int(status), message def gotHeaders(self, headers): self.headers_time = time() self.response_headers = headers