Example #1
0
 def test_spider_breadth(self):
     # Assert BREADTH cross-domain preference.
     v = web.Spider(links=["http://www.clips.ua.ac.be/"], delay=10)
     while len(v.visited) < 4:
         v.crawl(throttle=0.1, cached=False, method=web.BREADTH)
     self.assertTrue(v.history.keys()[0] != v.history.keys()[1])
     self.assertTrue(v.history.keys()[0] != v.history.keys()[2])
     self.assertTrue(v.history.keys()[1] != v.history.keys()[2])
     print "pattern.web.Spider.crawl(method=BREADTH)"
Example #2
0
 def test_spider_crawl(self):
     # Assert domain filter.
     v = web.Spider(links=["http://www.clips.ua.ac.be/"], domains=["clips.ua.ac.be"], delay=0.5)
     while len(v.visited) < 4:
         v.crawl(throttle=0.1, cached=False)
     for url in v.visited:
         self.assertTrue("clips.ua.ac.be" in url)
     self.assertTrue(len(v.history) == 1)
     print "pattern.web.Spider.crawl()"
Example #3
0
 def test_spider_delay(self):
     # Assert delay for several crawls to a single domain.
     v = web.Spider(links=["http://www.clips.ua.ac.be/"], domains=["clips.ua.ac.be"], delay=1.0)
     v.crawl()
     t = time.time()
     while not v.crawl(throttle=0.1, cached=False):
         pass
     t = time.time() - t
     self.assertTrue(t > 1.0)
     print "pattern.web.Spider.delay"