def test_setdefault(self):
        h = Headers()
        hlist = ['ip1', 'ip2']
        olist = h.setdefault('X-Forwarded-For', hlist)
        self.assertIsNot(h.getlist('X-Forwarded-For'), hlist)
        self.assertIs(h.getlist('X-Forwarded-For'), olist)

        h = Headers()
        olist = h.setdefault('X-Forwarded-For', 'ip1')
        self.assertEqual(h.getlist('X-Forwarded-For'), ['ip1'])
        self.assertIs(h.getlist('X-Forwarded-For'), olist)
Esempio n. 2
0
    def test_setdefault(self):
        h = Headers()
        hlist = ['ip1', 'ip2']
        olist = h.setdefault('X-Forwarded-For', hlist)
        self.assertIsNot(h.getlist('X-Forwarded-For'), hlist)
        self.assertIs(h.getlist('X-Forwarded-For'), olist)

        h = Headers()
        olist = h.setdefault('X-Forwarded-For', 'ip1')
        self.assertEqual(h.getlist('X-Forwarded-For'), ['ip1'])
        self.assertIs(h.getlist('X-Forwarded-For'), olist)
Esempio n. 3
0
class CrawlmiHTPPClientFactory(HTTPClientFactory):
    protocol = CrawlmiHTTPClient
    waiting = 1
    noisy = False
    followRedirect = False
    afterFoundGet = False

    def __init__(self, request, timeout=180, download_size=0):
        self.url = urldefrag(request.url)[0]
        self.method = request.method
        self.body = request.body or None
        self.headers = Headers(request.headers)
        self.response_headers = None
        self.start_time = time()
        self.deferred = defer.Deferred()
        self.deferred.addCallback(self._build_response, request)
        self.invalid_headers = []
        self.timeout = timeout
        self.download_size = download_size

        # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
        # to have _disconnectedDeferred. See Twisted r32329.
        # As Crawlmi implements it's own logic to handle redirects is not
        # needed to add the callback _waitForDisconnect.
        # Specifically this avoids the AttributeError exception when
        # clientConnectionFailed method is called.
        self._disconnectedDeferred = defer.Deferred()

        self._set_connection_attributes(request)

        # set Host header based on url
        self.headers.setdefault('Host', self.netloc)

        # set Content-Length based len of body
        if self.body is not None:
            self.headers['Content-Length'] = len(self.body)
            # just in case a broken http/1.1 decides to keep connection alive
            self.headers.setdefault('Connection', 'close')

    def _build_response(self, body, request):
        if self.invalid_headers:
            raise BadHttpHeaderError('Invalid headers received: %s' %
                                     self.invalid_headers)

        response_cls = resp_factory.from_args(headers=self.response_headers,
                                              url=self.url)
        response = response_cls(
            url=self.url, status=self.status, headers=self.response_headers,
            body=body, request=request)
        response.download_latency = self.headers_time - self.start_time
        return response

    def _set_connection_attributes(self, request):
        self.scheme, self.netloc, self.host, self.port, self.path = \
            _parse_url_args(request.url)
        if request.proxy:
            self.scheme, _, self.host, self.port, _ = \
                _parse_url_args(request.proxy)
            self.path = self.url

    def gotStatus(self, version, status, message):
        self.version, self.status, self.message = version, int(status), message

    def gotHeaders(self, headers):
        self.headers_time = time()
        self.response_headers = headers
Esempio n. 4
0
class CrawlmiHTPPClientFactory(HTTPClientFactory):
    protocol = CrawlmiHTTPClient
    waiting = 1
    noisy = False
    followRedirect = False
    afterFoundGet = False

    def __init__(self, request, timeout=180, download_size=0):
        self.url = urldefrag(request.url)[0]
        self.method = request.method
        self.body = request.body or None
        self.headers = Headers(request.headers)
        self.response_headers = None
        self.start_time = time()
        self.deferred = defer.Deferred()
        self.deferred.addCallback(self._build_response, request)
        self.invalid_headers = []
        self.timeout = timeout
        self.download_size = download_size

        # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
        # to have _disconnectedDeferred. See Twisted r32329.
        # As Crawlmi implements it's own logic to handle redirects is not
        # needed to add the callback _waitForDisconnect.
        # Specifically this avoids the AttributeError exception when
        # clientConnectionFailed method is called.
        self._disconnectedDeferred = defer.Deferred()

        self._set_connection_attributes(request)

        # set Host header based on url
        self.headers.setdefault('Host', self.netloc)

        # set Content-Length based len of body
        if self.body is not None:
            self.headers['Content-Length'] = len(self.body)
            # just in case a broken http/1.1 decides to keep connection alive
            self.headers.setdefault('Connection', 'close')

    def _build_response(self, body, request):
        if self.invalid_headers:
            raise BadHttpHeaderError('Invalid headers received: %s' %
                                     self.invalid_headers)

        response_cls = resp_factory.from_args(headers=self.response_headers,
                                              url=self.url)
        response = response_cls(url=self.url,
                                status=self.status,
                                headers=self.response_headers,
                                body=body,
                                request=request)
        response.download_latency = self.headers_time - self.start_time
        return response

    def _set_connection_attributes(self, request):
        self.scheme, self.netloc, self.host, self.port, self.path = \
            _parse_url_args(request.url)
        if request.proxy:
            self.scheme, _, self.host, self.port, _ = \
                _parse_url_args(request.proxy)
            self.path = self.url

    def gotStatus(self, version, status, message):
        self.version, self.status, self.message = version, int(status), message

    def gotHeaders(self, headers):
        self.headers_time = time()
        self.response_headers = headers