def test_crawler(self): # these should all be equivalent to the crawler url_pairs = ( ('http://testserver/', 'http://testserver/'), ('http://testserver/', 'http://testserver'), ('http://testserver', 'http://testserver/'), ('http://testserver', 'http://testserver'), ) for (src, dst) in url_pairs: h, c, urls = crawl(src, dst, 1) self.assertEqual(urls, [ 'http://testserver/1/', 'http://testserver/2/', 'http://testserver/?page=3', ]) # test a sub-page headers, content, urls = crawl('http://testserver/', 'http://testserver/1/', 1) self.assertEqual(urls, [ 'http://testserver/1/1/', 'http://testserver/1/2/', 'http://testserver/1/?page=3', ]) # test a deeper sub-page! header, content, urls = crawl('http://testserver/1/', 'http://testserver/1/2/', 2) self.assertEqual(urls, [ 'http://testserver/1/2/1/', 'http://testserver/1/2/2/', 'http://testserver/1/2/?page=3', ])
def test_crawling_500(self): headers, content, urls = crawl('http://testserver/', 'http://testserver/500/', 1) self.assertEqual(urls, []) self.assertEqual(headers['status'], '500')
def test_crawling_404(self): headers, content, urls = crawl('http://testserver/', 'http://testserver/404/', 1) self.assertEqual(urls, []) self.assertEqual(headers['status'], '404')