Ejemplo n.º 1
0
 def test_on_interrupt_is_not_called_without_interrupt(self):
     myCrawler = DomainCrawler()
     callback = CallableMock()
     myCrawler.on_interrupt(callback)
     self.assertEqual(callback.call_count(), 0)
     myCrawler.crawl("tiggers")
     self.assertEqual(callback.call_count(), 0)
Ejemplo n.º 2
0
 def test_calls_callback_on_finish(self):
     crawler = DomainCrawler()
     callback = CallableMock()
     crawler.on_finish(callback)
     self.assertEqual(callback.call_count(), 0)
     crawler.crawl("tiggers")
     self.assertEqual(callback.call_count(), 1)
Ejemplo n.º 3
0
 def test_calls_callback_on_interrupt(self):
     parser.parse.raises(KeyboardInterrupt())
     my_crawler = DomainCrawler()
     callback = CallableMock()
     my_crawler.on_interrupt(callback)
     self.assertEqual(callback.call_count(), 0)
     with self.assertRaises(KeyboardInterrupt):
         my_crawler.crawl("tiggers")
         self.assertEqual(callback.call_count(), 1)
Ejemplo n.º 4
0
class DomainCrawlerMethodTests(DomainCrawlerTests):
    start_mock = None
    callback = None

    def __reset_threadmocks(self):
        self.callback = lambda *a, **kw: None
        self.start_mock = CallableMock(
            callback=lambda *a, **kw: self.callback())
        work_service.request_work.reset(callback=lambda cb: cb())
        work_service.active_count.reset(returns=0)

    def setUp(self):
        DomainCrawlerTests.setUp(self)
        self.crawler = DomainCrawler("http://winnie_the_pooh")
        parser.parse.reset(returns=crawl_result)
        self.__reset_threadmocks()

    def test_crawl_calls_crawler(self):
        self.crawler.crawl()
        self.assertEqual(parser.parse.call_count(), 1)

    def test_crawls_all_links(self):
        crawl_result.links = [
            "http://winnie_the_pooh/kanga", "http://winnie_the_pooh/tiggers"
        ]
        self.crawler.crawl()
        self.assertEqual(parser.parse.call_count(), 3)

    def test_does_not_crawl_out_of_domain(self):
        crawl_result.links = ["http://kanga.com", "http://roo.com"]
        self.crawler.crawl()
        self.assertEqual(parser.parse.call_count(), 1)

    def test_recognizes_domain(self):
        crawl_result.links = [
            "http://kanga.com", "http://roo.com",
            "http://www.winnie_the_pooh/tiggers",
            "http://winnie_the_pooh/tiggers"
        ]
        self.crawler.crawl()
        self.assertEqual(parser.parse.call_count(), 3)

    def test_recognizes_slashslash_domain(self):
        crawl_result.links = ["//www.winnie_the_pooh/tiggers"]
        self.crawler.crawl()
        self.assertEqual(parser.parse.call_count(), 2)
Ejemplo n.º 5
0
 def test_works_with_url_in_crawl(self):
     mycrawler = DomainCrawler()
     mycrawler.crawl("www.winnie_the_pooh")
     self.assertEqual(parser.parse.call_count(), 1)
Ejemplo n.º 6
0
 def test_raises_AssertionError_if_no_url_is_present(self):
     crawler = DomainCrawler()
     with self.assertRaises(AssertionError):
         crawler.crawl()