コード例 #1
0
 def test_crawler_breadth(self):
     # Assert BREADTH cross-domain preference.
     v = web.Crawler(links=["http://www.clips.ua.ac.be/"], delay=10)
     while len(v.visited) < 4:
         v.crawl(throttle=0.1, cached=False, method=web.BREADTH)
     self.assertTrue(v.history.keys()[0] != v.history.keys()[1])
     self.assertTrue(v.history.keys()[0] != v.history.keys()[2])
     self.assertTrue(v.history.keys()[1] != v.history.keys()[2])
     print "pattern.web.Crawler.crawl(method=BREADTH)"
コード例 #2
0
 def test_crawler_crawl(self):
     # Assert domain filter.
     v = web.Crawler(links=["http://www.clips.ua.ac.be/"], domains=["clips.ua.ac.be"], delay=0.5)
     while len(v.visited) < 4:
         v.crawl(throttle=0.1, cached=False)
     for url in v.visited:
         self.assertTrue("clips.ua.ac.be" in url)
     self.assertTrue(len(v.history) == 1)
     print "pattern.web.Crawler.crawl()"
コード例 #3
0
ファイル: test_web.py プロジェクト: pri-k/pattern
 def test_crawler_crawl(self):
     # Assert domain filter.
     v = web.Crawler(links=["http://nodebox.net/"], domains=["nodebox.net"], delay=0.5)
     while len(v.visited) < 4:
         v.crawl(throttle=0.1, cached=False)
     for url in v.visited:
         self.assertTrue("nodebox.net" in url)
     self.assertTrue(len(v.history) == 2)
     print("pattern.web.Crawler.crawl()")
コード例 #4
0
 def test_crawler_delay(self):
     # Assert delay for several crawls to a single domain.
     v = web.Crawler(links=["http://www.clips.ua.ac.be/"], domains=["clips.ua.ac.be"], delay=1.0)
     v.crawl()
     t = time.time()
     while not v.crawl(throttle=0.1, cached=False):
         pass
     t = time.time() - t
     self.assertTrue(t > 1.0)
     print "pattern.web.Crawler.delay"
コード例 #5
0
ファイル: test_web.py プロジェクト: pri-k/pattern
 def test_crawler_delay(self):
     # Assert delay for several crawls to a single domain.
     v = web.Crawler(links=["http://nodebox.net/"], domains=["nodebox.net"], delay=1.2)
     v.crawl()
     t = time.time()
     while not v.crawl(throttle=0.1, cached=False):
         pass
     t = time.time() - t
     self.assertTrue(t > 1.0)
     print("pattern.web.Crawler.delay")