class DecompressionMiddlewareTest(TestCase):

    test_formats = ['tar', 'xml.bz2', 'xml.gz', 'zip']
    uncompressed_body, test_responses = _test_data(test_formats)

    def setUp(self):
        self.mw = DecompressionMiddleware()
        self.spider = Spider('foo')

    def test_known_compression_formats(self):
        for fmt in self.test_formats:
            rsp = self.test_responses[fmt]
            new = self.mw.process_response(None, rsp, self.spider)
            assert isinstance(new, XmlResponse), \
                'Failed %s, response type %s' % (fmt, type(new).__name__)
            assert_samelines(self, new.body, self.uncompressed_body, fmt)

    def test_plain_response(self):
        rsp = Response(url='http://test.com', body=self.uncompressed_body)
        new = self.mw.process_response(None, rsp, self.spider)
        assert new is rsp
        assert_samelines(self, new.body, rsp.body)

    def test_empty_response(self):
        rsp = Response(url='http://test.com', body=b'')
        new = self.mw.process_response(None, rsp, self.spider)
        assert new is rsp
        assert not rsp.body
        assert not new.body

    def tearDown(self):
        del self.mw
class DecompressionMiddlewareTest(TestCase):

    test_formats = ['tar', 'xml.bz2', 'xml.gz', 'zip']
    uncompressed_body, test_responses = _test_data(test_formats)

    def setUp(self):
        self.mw = DecompressionMiddleware()
        self.spider = Spider('foo')

    def test_known_compression_formats(self):
        for fmt in self.test_formats:
            rsp = self.test_responses[fmt]
            new = self.mw.process_response(None, rsp, self.spider)
            assert isinstance(new, XmlResponse), \
                    'Failed %s, response type %s' % (fmt, type(new).__name__)
            assert_samelines(self, new.body, self.uncompressed_body, fmt)

    def test_plain_response(self):
        rsp = Response(url='http://test.com', body=self.uncompressed_body)
        new = self.mw.process_response(None, rsp, self.spider)
        assert new is rsp
        assert_samelines(self, new.body, rsp.body)

    def test_empty_response(self):
        rsp = Response(url='http://test.com', body='')
        new = self.mw.process_response(None, rsp, self.spider)
        assert new is rsp
        assert not rsp.body
        assert not new.body

    def tearDown(self):
        del self.mw
Exemple #3
0
    def writeResponse(
        self, sresponse
    ):  #, response_str='PolitePol: Local page processing is failed'
        sresponse = HttpCompressionMiddleware().process_response(
            Request(sresponse.url), sresponse, None)
        sresponse = DecompressionMiddleware().process_response(
            None, sresponse, None)

        response_headers = self.prepare_response_headers(sresponse.headers)

        if (isinstance(sresponse, TextResponse)):
            ip = self.request.getHeader(
                'x-real-ip') or self.request.client.host
            response_str = self.prepare_response_str(
                sresponse.selector, sresponse.headers,
                sresponse.body_as_unicode(), sresponse.url, ip)
            if self.feed_config:
                response_headers = {
                    b"Content-Type": b'text/xml; charset=utf-8'
                }
        else:  # images and such
            response_str = sresponse.body

        for k, v in response_headers.items():
            self.request.setHeader(k, v)

        self.request.write(response_str)
        self.request.finish()
Exemple #4
0
def downloadDone(response_str, request, response, feed_config):
    url = response.request.absoluteURI

    print 'Response <%s> ready (%s bytes)' % (url, len(response_str))
    response = buildScrapyResponse(response, response_str, url)

    response = HttpCompressionMiddleware().process_response(Request(url), response, None)
    response = DecompressionMiddleware().process_response(None, response, None)

    if (isinstance(response, TextResponse)):
        if feed_config:
            response_str = buildFeed(response, feed_config)
            request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')
        else:
            response_str, file_name = setBaseAndRemoveScriptsAndMore(response, url)

    request.write(response_str)
    request.finish()
    
    periodical_garbage_collect()
Exemple #5
0
    def writeResponse(
            self,
            request,
            response,
            feed_config,
            response_str='PolitePol: Local page processing is failed'):
        response = HttpCompressionMiddleware().process_response(
            Request(response.url), response, None)
        response = DecompressionMiddleware().process_response(
            None, response, None)

        if (isinstance(response, TextResponse)):
            ip = request.getHeader('x-real-ip') or request.client.host
            response_str = self.prepare_response_str(
                response.selector, response.headers,
                response.body_as_unicode(), response.url, feed_config, ip)
            if feed_config:
                request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')

        request.write(response_str)
        request.finish()
 def setUp(self):
     self.mw = DecompressionMiddleware()
     self.spider = Spider('foo')
 def setUp(self):
     self.mw = DecompressionMiddleware()
     self.spider = Spider('foo')