コード例 #1
0
 def setUp(self):
     self.mw = MetaRefreshRedirect(get_engine())
コード例 #2
0
 def setUp(self):
     self.mw = MetaRefreshRedirect(get_engine())
コード例 #3
0
class MetaRefreshRedirectTest(unittest.TestCase):
    def setUp(self):
        self.mw = MetaRefreshRedirect(get_engine())

    def _body(self, interval=5, url='http://example.org/newpage'):
        return '''<html><head><meta http-equiv="refresh" content="%s;url=%s"/></head></html>''' % (interval, url)

    def test_priority_adjust(self):
        req = Request('http://a.com')
        rsp = HtmlResponse(req.url, body=self._body(), request=req)
        req2 = self.mw.process_response(rsp)
        self.assertTrue(req2.priority > req.priority)

    def test_meta_refresh(self):
        req = Request(url='http://example.org')
        rsp = HtmlResponse(req.url, body=self._body(), request=req)
        req2 = self.mw.process_response(rsp)
        self.assertIsInstance(req2, Request)
        self.assertEqual(req2.url, 'http://example.org/newpage')

    def test_meta_refresh_with_high_interval(self):
        # meta-refresh with high intervals don't trigger redirects
        req = Request(url='http://example.org')
        rsp = HtmlResponse(url='http://example.org', body=self._body(interval=1000), request=req)
        rsp2 = self.mw.process_response(rsp)
        self.assertIs(rsp, rsp2)

    def test_meta_refresh_trough_posted_request(self):
        req = Request(url='http://example.org', method='POST', body='test',
                      headers={'Content-Type': 'text/plain', 'Content-length': '4'})
        rsp = HtmlResponse(req.url, body=self._body(), request=req)
        req2 = self.mw.process_response(rsp)

        self.assertIsInstance(req2, Request)
        self.assertEqual(req2.url, 'http://example.org/newpage')
        self.assertEqual(req2.method, 'GET')
        self.assertNotIn('Content-Type', req2.headers,
            'Content-Type header must not be present in redirected request')
        self.assertNotIn('Content-Length', req2.headers,
            'Content-Length header must not be present in redirected request')
        self.assertNot(req2.body,
            'Redirected body must be empty, not `%s`' % req2.body)

    def test_max_redirect_times(self):
        self.mw.max_redirect_times = 1
        req = Request('http://test.org/max')
        rsp = HtmlResponse(req.url, body=self._body(), request=req)

        req = self.mw.process_response(rsp)
        self.assertIsInstance(req, Request)
        self.assertEqual(len(req.history), 1)
        rsp.request = req
        self.assertIsNone(self.mw.process_response(rsp))

    def test_redirect_urls(self):
        req1 = Request('http://test.org/first')
        rsp1 = HtmlResponse(req1.url, body=self._body(url='/redirected'), request=req1)
        req2 = self.mw.process_response(rsp1)
        self.assertIsInstance(req2, Request)
        rsp2 = HtmlResponse(req2.url, body=self._body(url='/redirected2'), request=req2)
        req3 = self.mw.process_response(rsp2)
        self.assertIsInstance(req3, Request)
        self.assertEqual(req2.url, 'http://test.org/redirected')
        self.assertListEqual(req2.history, ['http://test.org/first'])
        self.assertEqual(req3.url, 'http://test.org/redirected2')
        self.assertListEqual(req3.history, ['http://test.org/first', 'http://test.org/redirected'])
コード例 #4
0
class MetaRefreshRedirectTest(unittest.TestCase):
    def setUp(self):
        self.mw = MetaRefreshRedirect(get_engine())

    def _body(self, interval=5, url='http://example.org/newpage'):
        return '''<html><head><meta http-equiv="refresh" content="%s;url=%s"/></head></html>''' % (
            interval, url)

    def test_priority_adjust(self):
        req = Request('http://a.com')
        rsp = HtmlResponse(req.url, body=self._body(), request=req)
        req2 = self.mw.process_response(rsp)
        self.assertTrue(req2.priority > req.priority)

    def test_meta_refresh(self):
        req = Request(url='http://example.org')
        rsp = HtmlResponse(req.url, body=self._body(), request=req)
        req2 = self.mw.process_response(rsp)
        self.assertIsInstance(req2, Request)
        self.assertEqual(req2.url, 'http://example.org/newpage')

    def test_meta_refresh_with_high_interval(self):
        # meta-refresh with high intervals don't trigger redirects
        req = Request(url='http://example.org')
        rsp = HtmlResponse(url='http://example.org',
                           body=self._body(interval=1000),
                           request=req)
        rsp2 = self.mw.process_response(rsp)
        self.assertIs(rsp, rsp2)

    def test_meta_refresh_trough_posted_request(self):
        req = Request(url='http://example.org',
                      method='POST',
                      body='test',
                      headers={
                          'Content-Type': 'text/plain',
                          'Content-length': '4'
                      })
        rsp = HtmlResponse(req.url, body=self._body(), request=req)
        req2 = self.mw.process_response(rsp)

        self.assertIsInstance(req2, Request)
        self.assertEqual(req2.url, 'http://example.org/newpage')
        self.assertEqual(req2.method, 'GET')
        self.assertNotIn(
            'Content-Type', req2.headers,
            'Content-Type header must not be present in redirected request')
        self.assertNotIn(
            'Content-Length', req2.headers,
            'Content-Length header must not be present in redirected request')
        self.assertNot(req2.body,
                       'Redirected body must be empty, not `%s`' % req2.body)

    def test_max_redirect_times(self):
        self.mw.max_redirect_times = 1
        req = Request('http://test.org/max')
        rsp = HtmlResponse(req.url, body=self._body(), request=req)

        req = self.mw.process_response(rsp)
        self.assertIsInstance(req, Request)
        self.assertEqual(len(req.history), 1)
        rsp.request = req
        self.assertIsNone(self.mw.process_response(rsp))

    def test_redirect_urls(self):
        req1 = Request('http://test.org/first')
        rsp1 = HtmlResponse(req1.url,
                            body=self._body(url='/redirected'),
                            request=req1)
        req2 = self.mw.process_response(rsp1)
        self.assertIsInstance(req2, Request)
        rsp2 = HtmlResponse(req2.url,
                            body=self._body(url='/redirected2'),
                            request=req2)
        req3 = self.mw.process_response(rsp2)
        self.assertIsInstance(req3, Request)
        self.assertEqual(req2.url, 'http://test.org/redirected')
        self.assertListEqual(req2.history, ['http://test.org/first'])
        self.assertEqual(req3.url, 'http://test.org/redirected2')
        self.assertListEqual(
            req3.history,
            ['http://test.org/first', 'http://test.org/redirected'])