Ejemplo n.º 1
0
class TestRefererMiddleware(TestCase):

    req_meta = {}
    resp_headers = {}
    settings = {}
    scenarii = [
        ('http://scrapytest.org', 'http://scrapytest.org/',
         b'http://scrapytest.org'),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.mw = RefererMiddleware(settings)

    def get_request(self, target):
        return Request(target, meta=self.req_meta)

    def get_response(self, origin):
        return Response(origin, headers=self.resp_headers)

    def test(self):

        for origin, target, referrer in self.scenarii:
            response = self.get_response(origin)
            request = self.get_request(target)
            out = list(
                self.mw.process_spider_output(response, [request],
                                              self.spider))
            self.assertEqual(out[0].headers.get('Referer'), referrer)
class TestRefererMiddleware(TestCase):

    req_meta = {}
    resp_headers = {}
    settings = {}
    scenarii = [
        ('http://scrapytest.org', 'http://scrapytest.org/',  b'http://scrapytest.org'),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.mw = RefererMiddleware(settings)

    def get_request(self, target):
        return Request(target, meta=self.req_meta)

    def get_response(self, origin):
        return Response(origin, headers=self.resp_headers)

    def test(self):

        for origin, target, referrer in self.scenarii:
            response = self.get_response(origin)
            request = self.get_request(target)
            out = list(self.mw.process_spider_output(response, [request], self.spider))
            self.assertEqual(out[0].headers.get('Referer'), referrer)
Ejemplo n.º 3
0
class TestReferrerOnRedirect(TestRefererMiddleware):

    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
    scenarii = [
        (
            'http://scrapytest.org/1',      # parent
            'http://scrapytest.org/2',      # target
            (
                # redirections: code, URL
                (301, 'http://scrapytest.org/3'),
                (301, 'http://scrapytest.org/4'),
            ),
            b'http://scrapytest.org/1',  # expected initial referer
            b'http://scrapytest.org/1',  # expected referer for the redirection request
        ),
        (
            'https://scrapytest.org/1',
            'https://scrapytest.org/2',
            (
                # redirecting to non-secure URL
                (301, 'http://scrapytest.org/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
        (
            'https://scrapytest.org/1',
            'https://scrapytest.com/2',
            (
                # redirecting to non-secure URL: different origin
                (301, 'http://scrapytest.com/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.referrermw = RefererMiddleware(settings)
        self.redirectmw = RedirectMiddleware(settings)

    def test(self):

        for parent, target, redirections, init_referrer, final_referrer in self.scenarii:
            response = self.get_response(parent)
            request = self.get_request(target)

            out = list(self.referrermw.process_spider_output(response, [request], self.spider))
            self.assertEqual(out[0].headers.get('Referer'), init_referrer)

            for status, url in redirections:
                response = Response(request.url, headers={'Location': url}, status=status)
                request = self.redirectmw.process_response(request, response, self.spider)
                self.referrermw.request_scheduled(request, self.spider)

            assert isinstance(request, Request)
            self.assertEqual(request.headers.get('Referer'), final_referrer)
class TestReferrerOnRedirect(TestRefererMiddleware):

    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
    scenarii = [
        (   'http://scrapytest.org/1',      # parent
            'http://scrapytest.org/2',      # target
            (
                # redirections: code, URL
                (301, 'http://scrapytest.org/3'),
                (301, 'http://scrapytest.org/4'),
            ),
            b'http://scrapytest.org/1', # expected initial referer
            b'http://scrapytest.org/1', # expected referer for the redirection request
        ),
        (   'https://scrapytest.org/1',
            'https://scrapytest.org/2',
            (
                # redirecting to non-secure URL
                (301, 'http://scrapytest.org/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
        (   'https://scrapytest.org/1',
            'https://scrapytest.com/2',
            (
                # redirecting to non-secure URL: different origin
                (301, 'http://scrapytest.com/3'),
            ),
            b'https://scrapytest.org/1',
            b'https://scrapytest.org/1',
        ),
    ]

    def setUp(self):
        self.spider = Spider('foo')
        settings = Settings(self.settings)
        self.referrermw = RefererMiddleware(settings)
        self.redirectmw = RedirectMiddleware(settings)

    def test(self):

        for parent, target, redirections, init_referrer, final_referrer in self.scenarii:
            response = self.get_response(parent)
            request = self.get_request(target)

            out = list(self.referrermw.process_spider_output(response, [request], self.spider))
            self.assertEqual(out[0].headers.get('Referer'), init_referrer)

            for status, url in redirections:
                response = Response(request.url, headers={'Location': url}, status=status)
                request = self.redirectmw.process_response(request, response, self.spider)
                self.referrermw.request_scheduled(request, self.spider)

            assert isinstance(request, Request)
            self.assertEqual(request.headers.get('Referer'), final_referrer)
Ejemplo n.º 5
0
class TestRefererMiddleware(TestCase):
    def setUp(self):
        self.spider = Spider('foo')
        self.mw = RefererMiddleware()

    def test_process_spider_output(self):
        res = Response('http://scrapytest.org')
        reqs = [Request('http://scrapytest.org/')]

        out = list(self.mw.process_spider_output(res, reqs, self.spider))
        self.assertEquals(out[0].headers.get('Referer'),
                          'http://scrapytest.org')
class TestRefererMiddleware(TestCase):

    def setUp(self):
        self.spider = Spider('foo')
        self.mw = RefererMiddleware()

    def test_process_spider_output(self):
        res = Response('http://scrapytest.org')
        reqs = [Request('http://scrapytest.org/')]

        out = list(self.mw.process_spider_output(res, reqs, self.spider))
        self.assertEquals(out[0].headers.get('Referer'),
                          'http://scrapytest.org')