Example #1
0
 def test_crawler(self):
     # these should all be equivalent to the crawler
     url_pairs = (
         ('http://testserver/', 'http://testserver/'),
         ('http://testserver/', 'http://testserver'),
         ('http://testserver', 'http://testserver/'),
         ('http://testserver', 'http://testserver'),
     )
     for (src, dst) in url_pairs:
         h, c, urls = crawl(src, dst, 1)
         self.assertEqual(urls, [
             'http://testserver/1/',
             'http://testserver/2/',
             'http://testserver/?page=3',
         ])
     
     # test a sub-page
     headers, content, urls = crawl('http://testserver/', 'http://testserver/1/', 1)
     self.assertEqual(urls, [
         'http://testserver/1/1/',
         'http://testserver/1/2/',
         'http://testserver/1/?page=3',
     ])
     
     # test a deeper sub-page!
     header, content, urls = crawl('http://testserver/1/', 'http://testserver/1/2/', 2)
     self.assertEqual(urls, [
         'http://testserver/1/2/1/',
         'http://testserver/1/2/2/',
         'http://testserver/1/2/?page=3',
     ])
Example #2
0
 def test_crawling_500(self):
     headers, content, urls = crawl('http://testserver/', 'http://testserver/500/', 1)
     self.assertEqual(urls, [])
     self.assertEqual(headers['status'], '500')
Example #3
0
 def test_crawling_404(self):
     headers, content, urls = crawl('http://testserver/', 'http://testserver/404/', 1)
     self.assertEqual(urls, [])
     self.assertEqual(headers['status'], '404')