def test_next_url_new_domain_block(self): my_example = "https://my.example.com/" www_example = "https://www.example.com/" schedule = Scheduler(my_example) # Constructor calls upon schedule_url schedule.schedule_url(www_example) self.assertEqual(schedule.next_url(), my_example) self.assertEqual(schedule.next_url(), www_example)
def test_schedule_url_already_crawled(self): url = "https://my.example.com/path/to/location" schedule = Scheduler(url) # Constructor calls upon schedule_url schedule.next_url() self.assertFalse(schedule.schedule_url(url))
def test_next_url_extensions(self): schedule = Scheduler("https://my.example.com/path/to/location") # Constructor calls upon schedule_url schedule.schedule_url("https://my.example.com/path/to/location?key=val;word=bird#frag") correct_output = "https://my.example.com/path/to/location?key=val;word=bird#frag" self.assertEqual(schedule.next_url(), correct_output)
def test_next_url_empty_queue(self): schedule = Scheduler("https://my.example.com/path/to/location") # Constructor calls upon schedule_url schedule.next_url() self.assertFalse(schedule.next_url())
def test_next_url(self): schedule = Scheduler("https://my.example.com/path/to/location") # Constructor calls upon schedule_url correct_output = "https://my.example.com/path/to/location" self.assertEqual(schedule.next_url(), correct_output)