def test__process_html(self): soup = BeautifulSoup(self.html_test_string) c = Crawler("http://test.com") c._process_html_asset = mock.Mock() c._process_html_link = mock.Mock() c._process_html(soup) self.assertEqual(c._process_html_asset.call_count, 3) self.assertEqual(c._process_html_link.call_count, 4)
def test__process_next_url_blacklist(self): c = Crawler("http://a.com") c.bad_urls = {"http://a.com/a/b/c/": True} c.process_q.append("http://a.com/a/b/c/") c._make_request = mock.Mock(return_value=None) c._process_html = mock.Mock() c._process_next_url() self.assertEqual(len(c.process_q), 1) self.assertEqual(len(c.bad_urls), 2) c._process_next_url() self.assertEqual(len(c.process_q), 0) self.assertEqual(len(c.bad_urls), 2) self.assertEqual(c._process_html.call_count, 0)