def stop_procs(self):
     super(Context, self).stop_procs()
     temp_db = DBInterface(config=self.config["General"])
     urls = URLs(temp_db)
     # drop uncrawled urls last to prevent race conditions
     self.logger.info("Dropping uncrawled urls")
     urls.drop_uncrawled_urls()
Beispiel #2
0
def test_drop_uncrawled_urls_drop_all_but_one(db_interface, url_ids):
    u = URLs(db_interface)
    r = Request(db_interface)

    r.mark_as_requested(url_ids[0], 200, "wwww.internet.de")

    u.drop_uncrawled_urls()

    assert count_urls(db_interface) == 1
Beispiel #3
0
def test_drop_uncrawled_urls_drop_all(db_interface, url_ids):
    u = URLs(db_interface)

    u.drop_uncrawled_urls()

    assert count_urls(db_interface) == 0