def setUp(self): self.mw = Redirect(get_engine())
class RedirectTest(unittest.TestCase): def setUp(self): self.mw = Redirect(get_engine()) def test_priority_adjust(self): req = Request('http://a.com') resp = Response('http://a.com', headers={'Location': 'http://a.com/redirected'}, status=301, request=req) req2 = self.mw.process_response(resp) assert req2.priority > req.priority def test_redirect_301(self): def _test(method): url = 'http://www.example.com/301' url2 = 'http://www.example.com/redirected' req = Request(url, method=method) resp = Response(url, headers={'Location': url2}, status=301, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, method) # response without Location header but with status code is 3XX should be ignored del resp.headers['Location'] self.assertIs(self.mw.process_response(resp), resp) _test('GET') _test('POST') _test('HEAD') def test_redirect_302(self): url = 'http://www.example.com/302' url2 = 'http://www.example.com/redirected2' req = Request(url, method='POST', body='test', headers={'Content-Type': 'text/plain', 'Content-length': '4'}) resp = Response(url, headers={'Location': url2}, status=302, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, 'GET') self.assertNotIn('Content-Type', req2.headers, 'Content-Type header must not be present in redirected request') self.assertNotIn('Content-Length', req2.headers, 'Content-Length header must not be present in redirected request') self.assertEqual(req2.body, '', 'Redirected body must be empty, not `%s`' % req2.body) # response without Location header but with status code is 3XX should be ignored del resp.headers['Location'] self.assertIs(self.mw.process_response(resp), resp) def test_redirect_302_head(self): url = 'http://www.example.com/302' url2 = 'http://www.example.com/redirected2' req = Request(url, method='HEAD') resp = Response(url, headers={'Location': url2}, status=302, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, 'HEAD') # response without Location header but with status code is 3XX should be ignored del resp.headers['Location'] self.assertIs(self.mw.process_response(resp), resp) def test_max_redirect_times(self): self.mw.max_redirect_times = 1 req = Request('http://crawlmitest.org/302') resp = Response('http://crawlmitest.org/302', headers={'Location': '/redirected'}, status=302, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertListEqual(req2.history, ['http://crawlmitest.org/302']) resp2 = Response('http://crawlmitest.org/302', headers={'Location': '/redirected'}, status=302, request=req2) self.assertIsNone(self.mw.process_response(resp2)) def test_redirect_urls(self): req1 = Request('http://crawlmitest.org/first') resp1 = Response('http://crawlmitest.org/first', headers={'Location': '/redirected'}, status=302, request=req1) req2 = self.mw.process_response(resp1) resp2 = Response('http://crawlmitest.org/redirected', headers={'Location': '/redirected2'}, status=302, request=req2) req3 = self.mw.process_response(resp2) self.assertEqual(req2.url, 'http://crawlmitest.org/redirected') self.assertListEqual(req2.history, ['http://crawlmitest.org/first']) self.assertEqual(req3.url, 'http://crawlmitest.org/redirected2') self.assertListEqual(req3.history, ['http://crawlmitest.org/first', 'http://crawlmitest.org/redirected'])
class RedirectTest(unittest.TestCase): def setUp(self): self.mw = Redirect(get_engine()) def test_priority_adjust(self): req = Request('http://a.com') resp = Response('http://a.com', headers={'Location': 'http://a.com/redirected'}, status=301, request=req) req2 = self.mw.process_response(resp) assert req2.priority > req.priority def test_redirect_301(self): def _test(method): url = 'http://www.example.com/301' url2 = 'http://www.example.com/redirected' req = Request(url, method=method) resp = Response(url, headers={'Location': url2}, status=301, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, method) # response without Location header but with status code is 3XX should be ignored del resp.headers['Location'] self.assertIs(self.mw.process_response(resp), resp) _test('GET') _test('POST') _test('HEAD') def test_redirect_302(self): url = 'http://www.example.com/302' url2 = 'http://www.example.com/redirected2' req = Request(url, method='POST', body='test', headers={ 'Content-Type': 'text/plain', 'Content-length': '4' }) resp = Response(url, headers={'Location': url2}, status=302, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, 'GET') self.assertNotIn( 'Content-Type', req2.headers, 'Content-Type header must not be present in redirected request') self.assertNotIn( 'Content-Length', req2.headers, 'Content-Length header must not be present in redirected request') self.assertEqual(req2.body, '', 'Redirected body must be empty, not `%s`' % req2.body) # response without Location header but with status code is 3XX should be ignored del resp.headers['Location'] self.assertIs(self.mw.process_response(resp), resp) def test_redirect_302_head(self): url = 'http://www.example.com/302' url2 = 'http://www.example.com/redirected2' req = Request(url, method='HEAD') resp = Response(url, headers={'Location': url2}, status=302, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, 'HEAD') # response without Location header but with status code is 3XX should be ignored del resp.headers['Location'] self.assertIs(self.mw.process_response(resp), resp) def test_max_redirect_times(self): self.mw.max_redirect_times = 1 req = Request('http://crawlmitest.org/302') resp = Response('http://crawlmitest.org/302', headers={'Location': '/redirected'}, status=302, request=req) req2 = self.mw.process_response(resp) self.assertIsInstance(req2, Request) self.assertListEqual(req2.history, ['http://crawlmitest.org/302']) resp2 = Response('http://crawlmitest.org/302', headers={'Location': '/redirected'}, status=302, request=req2) self.assertIsNone(self.mw.process_response(resp2)) def test_redirect_urls(self): req1 = Request('http://crawlmitest.org/first') resp1 = Response('http://crawlmitest.org/first', headers={'Location': '/redirected'}, status=302, request=req1) req2 = self.mw.process_response(resp1) resp2 = Response('http://crawlmitest.org/redirected', headers={'Location': '/redirected2'}, status=302, request=req2) req3 = self.mw.process_response(resp2) self.assertEqual(req2.url, 'http://crawlmitest.org/redirected') self.assertListEqual(req2.history, ['http://crawlmitest.org/first']) self.assertEqual(req3.url, 'http://crawlmitest.org/redirected2') self.assertListEqual(req3.history, [ 'http://crawlmitest.org/first', 'http://crawlmitest.org/redirected' ])