def test_response_callable_selector(): body = b'<html><body></body></html>' resp = Response('http://www.url.com', body=body) assert isinstance(resp.selector, Selector) # next call retrieves the cached selector assert resp._cached_selector == resp.selector
def test_spider_no_parse(): class MyTestSpider(spider.Spider): name = 'foo' start_urls = ['http://my.url'] my_spider = MyTestSpider() response = Response(my_spider.start_urls[0], body=b'no_body') with pytest.raises(NotImplementedError) as e_info: my_spider.parse(response)
def test_with_process_chain(test_spider, middleware, loop, logger): request = Request('http://www.url.com', test_spider.parse) response = Response('http://www.url.com', body=b'') expected_result = {'dummy': 'result'} state = {} manager = spidermw.SpiderMiddlewareManager(*(middleware, )) result = loop.run_until_complete( manager.scrape_response(test_spider.parse, response, request, logger, test_spider)) assert result[0]['dummy'] == expected_result['dummy']
def test_with_process_chain_failure_without_handler(test_spider, loop, logger): class GenericMiddleware: def process_spider_input(self, response, spider): raise Exception('processing failed') request = Request('http://www.url.com', test_spider.parse) response = Response('http://www.url.com', body=b'') expected_exception_value = 'processing failed' state = {} manager = spidermw.SpiderMiddlewareManager(*(GenericMiddleware(), )) exception = loop.run_until_complete( manager.scrape_response(test_spider.parse, response, request, logger, test_spider)) assert exception.args[0] == expected_exception_value
def test_with_process_chain_failure(test_spider, loop, logger): class GenericMiddleware: def process_spider_input(self, response, spider): raise Exception('processing failed') def process_spider_exception(self, response, exception, spider): return 'processing failed' request = Request('http://www.url.com', test_spider.parse) response = Response('http://www.url.com', body=b'') expected_exception_value = 'processing failed' state = {} middleware = GenericMiddleware() manager = spidermw.SpiderMiddlewareManager(*(middleware, )) assert middleware.process_spider_exception in manager.methods[ 'process_spider_exception'] processed_result = loop.run_until_complete( manager.scrape_response(test_spider.parse, response, request, logger, test_spider)) assert processed_result == expected_exception_value
def test_response_with_none_str_url(): with pytest.raises(TypeError) as e_info: Response(b'http://www.url.com')
def test_response_simple_css_selection(): body = b'<html><body>my body</body></html>' resp = Response('http://www.url.com', body=body) elm = resp.css('body::text').extract_first() assert elm == 'my body'
def test_response_simple_xpath_selection(): body = b'<html><body>my body</body></html>' resp = Response('http://www.url.com', body=body) elm = resp.xpath('//body/text()').extract_first() assert elm == 'my body'
def test_response_with_str_body(): with pytest.raises(TypeError) as e_info: Response('http://www.url.com', body='fails')
def test_response_with_none_body(): resp = Response('http://www.url.com', body=None) assert resp.body == b''
def test_response_without_body(): resp = Response('http://www.url.com') assert resp.body == b''
async def downloader(request, logger, spider): url = request.url return Response(url, body=b'my_body')