def test_can_register_spiders(self): spider1 = Spider() spider2 = Spider() s = Silk(self.io_loop) s.register(spider1) s.register(spider2) self.assertIn(spider1, s.spiders) self.assertIn(spider2, s.spiders)
def test_spider_prints_urls_without_callback(self): allow_regex = ['Python','Ruby'] deny_regex = ['Deutsch'] spider1 = Spider(allow_regex, deny_regex, callback=None) s = Silk(self.io_loop, allowed_domains=['www.dmoz.org'], fail_silent=False) s.register(spider1) s.crawl('http://www.dmoz.org/Computers/Programming/Languages/Python/Books/', self.stop) response = self.wait()
def test__crawl(self): spider = Spider() s = Silk(self.io_loop, allowed_domains=['']) s.register(spider) s.crawl(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)