Exemple #1
0
 def test_next_next_url_multiple_adds(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location")
     block = DomainBlock(parsed_url)
     block.add_page("/other/page")
     block.add_page("/other")
     correct_output = "https://www.example.com/other"
     self.assertEqual(block.next_url(), correct_output)
Exemple #2
0
 def test_next_url_empty(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location")
     block = DomainBlock(parsed_url)
     block.next_url()
     self.assertFalse(block.next_url())
Exemple #3
0
 def test_next_url(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location")
     block = DomainBlock(parsed_url)
     correct_output = "https://www.example.com/path/to/location"
     self.assertEqual(block.next_url(), correct_output)
Exemple #4
0
 def test_add_extension_already_added(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location")
     block = DomainBlock(parsed_url)
     block.add_page("/other/path")
     correct_output = deque(["/path/to/location", "/other/path"])
     self.assertFalse(block.add_page("/other/path"))
Exemple #5
0
 def test_add_extension(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location")
     block = DomainBlock(parsed_url)
     block.add_page("/other/path")
     correct_output = deque(["/path/to/location", "/other/path"])
     self.assertEqual(sorted(block.pages_to_crawl), sorted(correct_output))
Exemple #6
0
 def test_constructor_extensions_to_crawl_with_path(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/path/to/location")
     block = DomainBlock(parsed_url)
     correct_output = deque(["/path/to/location"])
     self.assertEqual(block.pages_to_crawl, correct_output)
Exemple #7
0
 def test_constructor_extensions_to_crawl_base_directory(self):
     parsed_url = crawler_url.parse_url("https://www.example.com/")
     block = DomainBlock(parsed_url)
     correct_output = deque(["/"])
     self.assertEqual(block.pages_to_crawl, correct_output)