class DecompressionMiddlewareTest(TestCase): test_formats = ['tar', 'xml.bz2', 'xml.gz', 'zip'] uncompressed_body, test_responses = _test_data(test_formats) def setUp(self): self.mw = DecompressionMiddleware() self.spider = Spider('foo') def test_known_compression_formats(self): for fmt in self.test_formats: rsp = self.test_responses[fmt] new = self.mw.process_response(None, rsp, self.spider) assert isinstance(new, XmlResponse), \ 'Failed %s, response type %s' % (fmt, type(new).__name__) assert_samelines(self, new.body, self.uncompressed_body, fmt) def test_plain_response(self): rsp = Response(url='http://test.com', body=self.uncompressed_body) new = self.mw.process_response(None, rsp, self.spider) assert new is rsp assert_samelines(self, new.body, rsp.body) def test_empty_response(self): rsp = Response(url='http://test.com', body=b'') new = self.mw.process_response(None, rsp, self.spider) assert new is rsp assert not rsp.body assert not new.body def tearDown(self): del self.mw
class DecompressionMiddlewareTest(TestCase): test_formats = ['tar', 'xml.bz2', 'xml.gz', 'zip'] uncompressed_body, test_responses = _test_data(test_formats) def setUp(self): self.mw = DecompressionMiddleware() self.spider = Spider('foo') def test_known_compression_formats(self): for fmt in self.test_formats: rsp = self.test_responses[fmt] new = self.mw.process_response(None, rsp, self.spider) assert isinstance(new, XmlResponse), \ 'Failed %s, response type %s' % (fmt, type(new).__name__) assert_samelines(self, new.body, self.uncompressed_body, fmt) def test_plain_response(self): rsp = Response(url='http://test.com', body=self.uncompressed_body) new = self.mw.process_response(None, rsp, self.spider) assert new is rsp assert_samelines(self, new.body, rsp.body) def test_empty_response(self): rsp = Response(url='http://test.com', body='') new = self.mw.process_response(None, rsp, self.spider) assert new is rsp assert not rsp.body assert not new.body def tearDown(self): del self.mw
def writeResponse( self, sresponse ): #, response_str='PolitePol: Local page processing is failed' sresponse = HttpCompressionMiddleware().process_response( Request(sresponse.url), sresponse, None) sresponse = DecompressionMiddleware().process_response( None, sresponse, None) response_headers = self.prepare_response_headers(sresponse.headers) if (isinstance(sresponse, TextResponse)): ip = self.request.getHeader( 'x-real-ip') or self.request.client.host response_str = self.prepare_response_str( sresponse.selector, sresponse.headers, sresponse.body_as_unicode(), sresponse.url, ip) if self.feed_config: response_headers = { b"Content-Type": b'text/xml; charset=utf-8' } else: # images and such response_str = sresponse.body for k, v in response_headers.items(): self.request.setHeader(k, v) self.request.write(response_str) self.request.finish()
def downloadDone(response_str, request, response, feed_config): url = response.request.absoluteURI print 'Response <%s> ready (%s bytes)' % (url, len(response_str)) response = buildScrapyResponse(response, response_str, url) response = HttpCompressionMiddleware().process_response(Request(url), response, None) response = DecompressionMiddleware().process_response(None, response, None) if (isinstance(response, TextResponse)): if feed_config: response_str = buildFeed(response, feed_config) request.setHeader(b"Content-Type", b'text/xml; charset=utf-8') else: response_str, file_name = setBaseAndRemoveScriptsAndMore(response, url) request.write(response_str) request.finish() periodical_garbage_collect()
def writeResponse( self, request, response, feed_config, response_str='PolitePol: Local page processing is failed'): response = HttpCompressionMiddleware().process_response( Request(response.url), response, None) response = DecompressionMiddleware().process_response( None, response, None) if (isinstance(response, TextResponse)): ip = request.getHeader('x-real-ip') or request.client.host response_str = self.prepare_response_str( response.selector, response.headers, response.body_as_unicode(), response.url, feed_config, ip) if feed_config: request.setHeader(b"Content-Type", b'text/xml; charset=utf-8') request.write(response_str) request.finish()
def setUp(self): self.mw = DecompressionMiddleware() self.spider = Spider('foo')
def setUp(self): self.mw = DecompressionMiddleware() self.spider = Spider('foo')