def test_spider_breadth(self): # Assert BREADTH cross-domain preference. v = web.Spider(links=["http://www.clips.ua.ac.be/"], delay=10) while len(v.visited) < 4: v.crawl(throttle=0.1, cached=False, method=web.BREADTH) self.assertTrue(v.history.keys()[0] != v.history.keys()[1]) self.assertTrue(v.history.keys()[0] != v.history.keys()[2]) self.assertTrue(v.history.keys()[1] != v.history.keys()[2]) print "pattern.web.Spider.crawl(method=BREADTH)"
def test_spider_crawl(self): # Assert domain filter. v = web.Spider(links=["http://www.clips.ua.ac.be/"], domains=["clips.ua.ac.be"], delay=0.5) while len(v.visited) < 4: v.crawl(throttle=0.1, cached=False) for url in v.visited: self.assertTrue("clips.ua.ac.be" in url) self.assertTrue(len(v.history) == 1) print "pattern.web.Spider.crawl()"
def test_spider_delay(self): # Assert delay for several crawls to a single domain. v = web.Spider(links=["http://www.clips.ua.ac.be/"], domains=["clips.ua.ac.be"], delay=1.0) v.crawl() t = time.time() while not v.crawl(throttle=0.1, cached=False): pass t = time.time() - t self.assertTrue(t > 1.0) print "pattern.web.Spider.delay"