Example #1
0
    def test__process_html(self):
        soup = BeautifulSoup(self.html_test_string)
        c = Crawler("http://test.com")
        c._process_html_asset = mock.Mock()
        c._process_html_link = mock.Mock()

        c._process_html(soup)
        self.assertEqual(c._process_html_asset.call_count, 3)
        self.assertEqual(c._process_html_link.call_count, 4)
Example #2
0
    def test__process_next_url_blacklist(self):
        c = Crawler("http://a.com")
        c.bad_urls = {"http://a.com/a/b/c/": True}
        c.process_q.append("http://a.com/a/b/c/")

        c._make_request = mock.Mock(return_value=None)
        c._process_html = mock.Mock()

        c._process_next_url()
        self.assertEqual(len(c.process_q), 1)
        self.assertEqual(len(c.bad_urls), 2)

        c._process_next_url()
        self.assertEqual(len(c.process_q), 0)
        self.assertEqual(len(c.bad_urls), 2)

        self.assertEqual(c._process_html.call_count, 0)