class TestRefererMiddleware(TestCase): req_meta = {} resp_headers = {} settings = {} scenarii = [ ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.mw = RefererMiddleware(settings) def get_request(self, target): return Request(target, meta=self.req_meta) def get_response(self, origin): return Response(origin, headers=self.resp_headers) def test(self): for origin, target, referrer in self.scenarii: response = self.get_response(origin) request = self.get_request(target) out = list( self.mw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), referrer)
class TestRefererMiddleware(TestCase): req_meta = {} resp_headers = {} settings = {} scenarii = [ ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.mw = RefererMiddleware(settings) def get_request(self, target): return Request(target, meta=self.req_meta) def get_response(self, origin): return Response(origin, headers=self.resp_headers) def test(self): for origin, target, referrer in self.scenarii: response = self.get_response(origin) request = self.get_request(target) out = list(self.mw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), referrer)
class TestReferrerOnRedirect(TestRefererMiddleware): settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'} scenarii = [ ( 'http://scrapytest.org/1', # parent 'http://scrapytest.org/2', # target ( # redirections: code, URL (301, 'http://scrapytest.org/3'), (301, 'http://scrapytest.org/4'), ), b'http://scrapytest.org/1', # expected initial referer b'http://scrapytest.org/1', # expected referer for the redirection request ), ( 'https://scrapytest.org/1', 'https://scrapytest.org/2', ( # redirecting to non-secure URL (301, 'http://scrapytest.org/3'), ), b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), ( 'https://scrapytest.org/1', 'https://scrapytest.com/2', ( # redirecting to non-secure URL: different origin (301, 'http://scrapytest.com/3'), ), b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.referrermw = RefererMiddleware(settings) self.redirectmw = RedirectMiddleware(settings) def test(self): for parent, target, redirections, init_referrer, final_referrer in self.scenarii: response = self.get_response(parent) request = self.get_request(target) out = list(self.referrermw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), init_referrer) for status, url in redirections: response = Response(request.url, headers={'Location': url}, status=status) request = self.redirectmw.process_response(request, response, self.spider) self.referrermw.request_scheduled(request, self.spider) assert isinstance(request, Request) self.assertEqual(request.headers.get('Referer'), final_referrer)
class TestReferrerOnRedirect(TestRefererMiddleware): settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'} scenarii = [ ( 'http://scrapytest.org/1', # parent 'http://scrapytest.org/2', # target ( # redirections: code, URL (301, 'http://scrapytest.org/3'), (301, 'http://scrapytest.org/4'), ), b'http://scrapytest.org/1', # expected initial referer b'http://scrapytest.org/1', # expected referer for the redirection request ), ( 'https://scrapytest.org/1', 'https://scrapytest.org/2', ( # redirecting to non-secure URL (301, 'http://scrapytest.org/3'), ), b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), ( 'https://scrapytest.org/1', 'https://scrapytest.com/2', ( # redirecting to non-secure URL: different origin (301, 'http://scrapytest.com/3'), ), b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), ] def setUp(self): self.spider = Spider('foo') settings = Settings(self.settings) self.referrermw = RefererMiddleware(settings) self.redirectmw = RedirectMiddleware(settings) def test(self): for parent, target, redirections, init_referrer, final_referrer in self.scenarii: response = self.get_response(parent) request = self.get_request(target) out = list(self.referrermw.process_spider_output(response, [request], self.spider)) self.assertEqual(out[0].headers.get('Referer'), init_referrer) for status, url in redirections: response = Response(request.url, headers={'Location': url}, status=status) request = self.redirectmw.process_response(request, response, self.spider) self.referrermw.request_scheduled(request, self.spider) assert isinstance(request, Request) self.assertEqual(request.headers.get('Referer'), final_referrer)
class TestRefererMiddleware(TestCase): def setUp(self): self.spider = Spider('foo') self.mw = RefererMiddleware() def test_process_spider_output(self): res = Response('http://scrapytest.org') reqs = [Request('http://scrapytest.org/')] out = list(self.mw.process_spider_output(res, reqs, self.spider)) self.assertEquals(out[0].headers.get('Referer'), 'http://scrapytest.org')
class TestRefererMiddleware(TestCase): def setUp(self): self.spider = Spider('foo') self.mw = RefererMiddleware() def test_process_spider_output(self): res = Response('http://scrapytest.org') reqs = [Request('http://scrapytest.org/')] out = list(self.mw.process_spider_output(res, reqs, self.spider)) self.assertEquals(out[0].headers.get('Referer'), 'http://scrapytest.org')