def test_next_next_url_multiple_adds(self): parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location") block = DomainBlock(parsed_url) block.add_page("/other/page") block.add_page("/other") correct_output = "https://www.example.com/other" self.assertEqual(block.next_url(), correct_output)
def test_next_url_empty(self): parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location") block = DomainBlock(parsed_url) block.next_url() self.assertFalse(block.next_url())
def test_next_url(self): parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location") block = DomainBlock(parsed_url) correct_output = "https://www.example.com/path/to/location" self.assertEqual(block.next_url(), correct_output)
def test_add_extension_already_added(self): parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location") block = DomainBlock(parsed_url) block.add_page("/other/path") correct_output = deque(["/path/to/location", "/other/path"]) self.assertFalse(block.add_page("/other/path"))
def test_add_extension(self): parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location") block = DomainBlock(parsed_url) block.add_page("/other/path") correct_output = deque(["/path/to/location", "/other/path"]) self.assertEqual(sorted(block.pages_to_crawl), sorted(correct_output))
def test_constructor_extensions_to_crawl_with_path(self): parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location") block = DomainBlock(parsed_url) correct_output = deque(["/path/to/location"]) self.assertEqual(block.pages_to_crawl, correct_output)
def test_constructor_extensions_to_crawl_base_directory(self): parsed_url = crawler_url.parse_url("https://www.example.com/") block = DomainBlock(parsed_url) correct_output = deque(["/"]) self.assertEqual(block.pages_to_crawl, correct_output)