def test_mailto_url_with_one_broken_link(self): """Test that the mailto: URLs are formed correctly when one broken link. """ user = factories.User() config.authorized_users = [user["name"]] dataset = custom_factories.Dataset( maintainer_email="*****@*****.**") resource_1 = custom_factories.Resource(package_id=dataset["id"]) resource_2 = custom_factories.Resource(package_id=dataset["id"]) resource_3 = custom_factories.Resource(package_id=dataset["id"]) custom_helpers.make_broken((resource_1, resource_2, resource_3), user) result = helpers.call_action( "ckanext_deadoralive_broken_links_by_email") assert len(result) == 1 result = result[0] subject = "You have a dataset with broken links on CKAN" body = "This dataset contains a broken link:%0A%0A{title}%0A{url}" url = "http://test.ckan.net/dataset/" + dataset["name"] body = body.format(title=dataset["title"], url=url) expected_mailto = "mailto:{email}?subject={subject}&body={body}".format( email=dataset["maintainer_email"], subject=subject, body=body) assert result["mailto"] == expected_mailto
def test_with_some_resources_checked_recently_and_some_never(self): """ If there are 5 resources that have been checked in last 24 hours and 5 that have never been checked and 10 resources are requested, it should return the 5 that have not been checked, sorted oldest-resource-first. """ resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] twenty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=23) results.upsert(resource_1, True, last_checked=twenty_hours_ago) results.upsert(resource_2, True, last_checked=twenty_hours_ago) results.upsert(resource_3, True, last_checked=twenty_hours_ago) results.upsert(resource_4, True, last_checked=twenty_hours_ago) results.upsert(resource_5, True, last_checked=twenty_hours_ago) resource_6 = factories.Resource()['id'] resource_7 = factories.Resource()['id'] resource_8 = factories.Resource()['id'] resource_9 = factories.Resource()['id'] resource_10 = factories.Resource()['id'] resources_to_check = results.get_resources_to_check(10) assert resources_to_check == [ resource_6, resource_7, resource_8, resource_9, resource_10 ]
def test_get_resources_to_check(self): """Simple test: call get_resources_to_check() and test the result.""" custom_factories.Resource() custom_factories.Resource() custom_factories.Resource() resource_ids = helpers.call_action( "ckanext_deadoralive_get_resources_to_check") assert len(resource_ids) == 3
def test_mix_of_broken_and_working_links(self): user = factories.User() config.authorized_users = [user["name"]] maintainer_1 = "*****@*****.**" dataset_1 = custom_factories.Dataset( maintainer_email=maintainer_1) resource_1 = custom_factories.Resource(package_id=dataset_1["id"]) dataset_2 = custom_factories.Dataset( maintainer_email=maintainer_1) resource_2 = custom_factories.Resource(package_id=dataset_2["id"]) maintainer_2 = "*****@*****.**" dataset_3 = custom_factories.Dataset( maintainer_email=maintainer_2) resource_3 = custom_factories.Resource(package_id=dataset_3["id"]) resource_4 = custom_factories.Resource(package_id=dataset_3["id"]) maintainer_3 = "*****@*****.**" dataset_4 = custom_factories.Dataset( maintainer_email=maintainer_3) resource_5 = custom_factories.Resource(package_id=dataset_4["id"]) dataset_5 = custom_factories.Dataset( maintainer_email=maintainer_3) resource_6 = custom_factories.Resource(package_id=dataset_5["id"]) resource_7 = custom_factories.Resource(package_id=dataset_5["id"]) resource_8 = custom_factories.Resource(package_id=dataset_5["id"]) custom_helpers.make_broken((resource_1, resource_3, resource_4, resource_5, resource_6, resource_7), user) custom_helpers.make_working((resource_2, resource_8), user) report = helpers.call_action( "ckanext_deadoralive_broken_links_by_email") assert len(report) == 3, ("There should be 3 emails listed in the " "report") assert [item["email"] for item in report] == [ maintainer_3, maintainer_2, maintainer_1], ( "The items should be sorted most broken datasets first") # Check that the num_broken_links for each item is correct. assert report[0]["num_broken_links"] == 3 assert report[1]["num_broken_links"] == 2 assert report[2]["num_broken_links"] == 1 # Check maintainer_3's report in detail. maintainer_3_report = report[0] assert len(maintainer_3_report["datasets_with_broken_links"]) == 2 broken_datasets = maintainer_3_report["datasets_with_broken_links"] assert [dataset["name"] for dataset in broken_datasets] == [ dataset_5["name"], dataset_4["name"]] dataset_5_report = broken_datasets[0] assert dataset_5_report["num_broken_links"] == 2 assert len(dataset_5_report["resources_with_broken_links"]) == 2 assert [resource_id for resource_id in dataset_5_report["resources_with_broken_links"]] == [ resource_6["id"], resource_7["id"]]
def test_with_some_resources_checked_recently_and_some_not_recently(self): """ If there are 5 resources that have been checked in last 24 hours and 5 that were last checked more than 24 hours ago and 10 resources are requested, it should return the 5 that have not been checked recently, sorted most-recently-checked last. """ now = datetime.datetime.utcnow() resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] twenty_hours_ago = now - datetime.timedelta(hours=23) results.upsert(resource_1, True, last_checked=twenty_hours_ago) results.upsert(resource_2, True, last_checked=twenty_hours_ago) results.upsert(resource_3, True, last_checked=twenty_hours_ago) results.upsert(resource_4, True, last_checked=twenty_hours_ago) results.upsert(resource_5, True, last_checked=twenty_hours_ago) resource_6 = factories.Resource()['id'] resource_7 = factories.Resource()['id'] resource_8 = factories.Resource()['id'] resource_9 = factories.Resource()['id'] resource_10 = factories.Resource()['id'] # We mix up the order in which these resources were checked a bit. results.upsert(resource_7, True, last_checked=now - datetime.timedelta(hours=34)) results.upsert(resource_6, True, last_checked=now - datetime.timedelta(hours=33)) results.upsert(resource_9, True, last_checked=now - datetime.timedelta(hours=32)) results.upsert(resource_10, True, last_checked=now - datetime.timedelta(hours=31)) results.upsert(resource_8, True, last_checked=now - datetime.timedelta(hours=30)) resources_to_check = results.get_resources_to_check(10) assert resources_to_check == [ resource_7, resource_6, resource_9, resource_10, resource_8 ]
def test_dataset_with_no_broken_resources(self): """A dataset with no broken resources shouldn't be in the report.""" dataset = custom_factories.Dataset() custom_factories.Resource(package_id=dataset["id"]) result = helpers.call_action( "ckanext_deadoralive_broken_links_by_email") assert result == []
def test_mix_of_broken_and_working_links(self): user = factories.User() config.authorized_users = [user["name"]] org_1 = factories.Organization() dataset_1 = custom_factories.Dataset(owner_org=org_1["id"]) dataset_2 = custom_factories.Dataset(owner_org=org_1["id"]) resource_1 = custom_factories.Resource(package_id=dataset_1["id"]) resource_2 = custom_factories.Resource(package_id=dataset_2["id"]) org_2 = factories.Organization() dataset_3 = custom_factories.Dataset(owner_org=org_2["id"]) resource_3 = custom_factories.Resource(package_id=dataset_3["id"]) resource_4 = custom_factories.Resource(package_id=dataset_3["id"]) org_3 = factories.Organization() dataset_4 = custom_factories.Dataset(owner_org=org_3["id"]) resource_5 = custom_factories.Resource(package_id=dataset_4["id"]) dataset_5 = custom_factories.Dataset(owner_org=org_3["id"]) resource_6 = custom_factories.Resource(package_id=dataset_5["id"]) resource_7 = custom_factories.Resource(package_id=dataset_5["id"]) resource_8 = custom_factories.Resource(package_id=dataset_5["id"]) custom_helpers.make_broken((resource_1, resource_3, resource_4, resource_5, resource_6, resource_7), user) custom_helpers.make_working((resource_2, resource_8), user) report = helpers.call_action( "ckanext_deadoralive_broken_links_by_organization") assert len(report) == 3, ("There should be 3 organizations listed in " "the report") assert [org["name"] for org in report] == [ org_3["name"], org_2["name"], org_1["name"] ], ("The organizations should be sorted most broken datasets first") # Check that the num_broken_links for each org is correct. assert report[0]["num_broken_links"] == 3 assert report[1]["num_broken_links"] == 2 assert report[2]["num_broken_links"] == 1 # Check org_3's report in detail. org_3_report = report[0] assert len(org_3_report["datasets_with_broken_links"]) == 2 org_3_broken_datasets = org_3_report["datasets_with_broken_links"] assert [dataset["name"] for dataset in org_3_broken_datasets ] == [dataset_5["name"], dataset_4["name"]] dataset_5_report = org_3_broken_datasets[0] assert dataset_5_report["num_broken_links"] == 2 assert len(dataset_5_report["resources_with_broken_links"]) == 2 assert [ resource_id for resource_id in dataset_5_report["resources_with_broken_links"] ] == [resource_6["id"], resource_7["id"]]
def test_with_5_new_resources_and_request_10(self): """ If there are 5 new resources (that have never been checked before) and 10 resources to check are requested, the 5 should be returned in oldest-first order. """ resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] resources_to_check = results.get_resources_to_check(10) assert resources_to_check == [ resource_1, resource_2, resource_3, resource_4, resource_5 ]
def test_custom_longer_pending_since(self): """If given a longer ``pending_since`` time it should not return resources that have more recent pending checks.""" test_resource = factories.Resource()['id'] three_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=3) results._make_pending([test_resource], three_hours_ago) results_ = results.get_resources_to_check( 10, pending_since=datetime.timedelta(hours=4)) assert results_ == []
def test_custom_longer_since(self): """If given a longer ``since`` time it should not return resources that were checked more recently.""" test_resource = factories.Resource()['id'] thirty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=30) results.upsert(test_resource, True, last_checked=thirty_hours_ago) results_ = results.get_resources_to_check( 10, since=datetime.timedelta(hours=48)) assert results_ == []
def test_custom_shorter_pending_since(self): """If given a shorter ``pending_since`` time it should return resources that have more recent pending checks.""" test_resource = factories.Resource()['id'] one_hour_ago = datetime.datetime.utcnow() - datetime.timedelta(hours=1) results._make_pending([test_resource], one_hour_ago) results_ = results.get_resources_to_check( 10, pending_since=datetime.timedelta(hours=0.5)) assert len(results_) == 1 assert results_[0] == test_resource
def test_custom_shorter_since(self): """If given a shorter ``since`` time it should return resources that have been checked more recently.""" test_resource = factories.Resource()['id'] ten_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=10) results.upsert(test_resource, True, last_checked=ten_hours_ago) results_ = results.get_resources_to_check( 10, since=datetime.timedelta(hours=5)) assert len(results_) == 1 assert results_[0] == test_resource
def test_unicode(self): """Test that it doesn't crash on non-ASCII characters.""" user = factories.User() config.authorized_users = [user["name"]] dataset = custom_factories.Dataset( title=u"ötåeåst", maintainer_email=u"ötåeå[email protected]", maintainer=u"ötåeåst_maintainer") resource = custom_factories.Resource(package_id=dataset["id"]) custom_helpers.make_broken((resource,), user) helpers.call_action("ckanext_deadoralive_broken_links_by_email")
def test_unicode(self): """Test that it doesn't crash with non-ASCII characters in the input.""" user = factories.User() config.authorized_users = [user["name"]] org = factories.Organization(title=u'Test Örgänißation') dataset = custom_factories.Dataset(owner_org=org['id'], title=u'Test Dätaßet') resource = custom_factories.Resource(package_id=dataset["id"], name=u'Test Rëßource', url=u'http://bröken_link') custom_helpers.make_broken((resource, ), user) helpers.call_action("ckanext_deadoralive_broken_links_by_organization")
def test_dataset_with_no_email(self): """Datasets with no email should get email: None. All datasets that have broken links but no maintainer or author email should be grouped into a single email: None dict in the report. """ user = factories.User() config.authorized_users = [user["name"]] # Create 3 datasets with no authors or maintainers. dataset_1 = custom_factories.Dataset() dataset_2 = custom_factories.Dataset() dataset_3 = custom_factories.Dataset() # Each of the datasets needs to have a resource with a broken link, # so that they show up in the report. resource_1 = custom_factories.Resource(package_id=dataset_1["id"]) resource_2 = custom_factories.Resource(package_id=dataset_2["id"]) resource_3 = custom_factories.Resource(package_id=dataset_3["id"]) custom_helpers.make_broken((resource_1, resource_2, resource_3), user=user) result = helpers.call_action( "ckanext_deadoralive_broken_links_by_email") assert len(result) == 1 result = result[0] assert result["email"] is None assert result["num_broken_links"] == 3 datasets = result["datasets_with_broken_links"] assert len(datasets) == 3 dataset_names = [dataset["name"] for dataset in datasets] for dataset in (dataset_1, dataset_2, dataset_3): assert dataset["name"] in dataset_names
def test_that_it_does_return_resources_with_expired_pending_checks(self): """Resources with pending checks > 2 hours old should be returned. And they should be sorted oldest-pending-check-first. """ # Create 5 resources with pending checks from > 2 hours ago. resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] five_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=5) results._make_pending( [resource_1, resource_2, resource_3, resource_4, resource_5], five_hours_ago) resources_to_check = results.get_resources_to_check(10) assert resources_to_check == [ resource_1, resource_2, resource_3, resource_4, resource_5 ]
def test_when_all_resources_have_been_checked_recently(self): """ If there are 5 resources and they have all been checked in last 24 hours then it should return an empty list. """ resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] twenty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=23) results.upsert(resource_1, True, last_checked=twenty_hours_ago) results.upsert(resource_2, True, last_checked=twenty_hours_ago) results.upsert(resource_3, True, last_checked=twenty_hours_ago) results.upsert(resource_4, True, last_checked=twenty_hours_ago) results.upsert(resource_5, True, last_checked=twenty_hours_ago) resources_to_check = results.get_resources_to_check(10) assert resources_to_check == []
def test_broken_links_by_organization_with_unicode(self): user = factories.User() config.authorized_users = [user["name"]] org = factories.Organization(title=u"Test Örganißation") dataset = custom_factories.Dataset(owner_org=org["id"], title=u"Test Dätaßet") resource = custom_factories.Resource(package_id=dataset["id"], name=u"Test Rëßource", url=u"http://bröken_link") custom_helpers.make_broken((resource, ), user=user) self.app.get("/organization/broken_links")
def test_with_10_new_resources_and_request_5(self): """ If there are 10 new resources (that have never been checked before) and 5 resources to check are requested, the oldest 5 should be returned in oldest-first order. """ resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] factories.Resource()['id'] factories.Resource()['id'] factories.Resource()['id'] factories.Resource()['id'] factories.Resource()['id'] orgs = config.organization_to_filter resources_to_check = results.get_resources_to_check(5, orgs) assert resources_to_check == [ resource_1, resource_2, resource_3, resource_4, resource_5 ]
def test_broken_links_by_email_with_unicode(self): sysadmin = custom_factories.Sysadmin() extra_environ = {'REMOTE_USER': str(sysadmin["name"])} maintainer = u"Mäintainer" maintainer_email = u"mä[email protected]" author = u"Aüthör" author_email = u"aüthö[email protected]" dataset_1 = custom_factories.Dataset(title=u"Test Dätaßet", maintainer=maintainer, maintainer_email=maintainer_email) dataset_2 = custom_factories.Dataset(title=u"Test Dätaßet", author=author, author_email=author_email) resource_1 = custom_factories.Resource(package_id=dataset_1["id"], name=u"Test Rësourße", url=u"http://bröken_link") resource_2 = custom_factories.Resource(package_id=dataset_2["id"], name=u"Test Rësourße", url=u"http://bröken_link") custom_helpers.make_broken((resource_1, resource_2), user=sysadmin) self.app.get("/ckan-admin/broken_links", extra_environ=extra_environ)
def test_upsert(self): """Simple test: call upsert() then call get() and check the result.""" resource = factories.Resource() helpers.call_action("ckanext_deadoralive_upsert", resource_id=resource["id"], alive=True, status=200, reason="OK") result = helpers.call_action("ckanext_deadoralive_get", resource_id=resource["id"]) assert result["resource_id"] == resource["id"] assert result["alive"] is True assert result["status"] == 200 assert result["reason"] == "OK"
def test_that_it_creates_pending_checks(self): """get_resources_to_check() should create pending link checker results for all the resources it returns.""" # A resource that has never been checked. resource_1 = factories.Resource()['id'] # A resource that was checked > 24 hours ago. resource_2 = factories.Resource()['id'] thirty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=30) results.upsert(resource_2, True, last_checked=thirty_hours_ago) # A resource with a pending check from > 2 hours ago. resource_3 = factories.Resource()['id'] three_hours_ago = datetime.datetime.utcnow() - datetime.timedelta( hours=3) results._make_pending([resource_3], three_hours_ago) results.get_resources_to_check(10) for resource in (resource_1, resource_2, resource_3): result = results.get(resource) assert result["pending"] is True
def test_upsert_with_unicode(self): """Test unicode strings with non-ASCII chars in the result.""" resource = factories.Resource() helpers.call_action("ckanext_deadoralive_upsert", resource_id=resource["id"], alive=True, status=200, reason=u"Alleß ökäy!") result = helpers.call_action("ckanext_deadoralive_get", resource_id=resource["id"]) assert result["resource_id"] == resource["id"] assert result["alive"] is True assert result["status"] == 200 assert result["reason"] == u"Alleß ökäy!"
def test_broken_links_by_email(self): sysadmin = custom_factories.Sysadmin() extra_environ = {'REMOTE_USER': str(sysadmin["name"])} maintainer_1 = "*****@*****.**" dataset_1 = custom_factories.Dataset(maintainer_email=maintainer_1) resource_1 = custom_factories.Resource(package_id=dataset_1["id"]) dataset_2 = custom_factories.Dataset(maintainer_email=maintainer_1) resource_2 = custom_factories.Resource(package_id=dataset_2["id"]) maintainer_2 = "*****@*****.**" dataset_3 = custom_factories.Dataset(maintainer_email=maintainer_2) resource_3 = custom_factories.Resource(package_id=dataset_3["id"]) resource_4 = custom_factories.Resource(package_id=dataset_3["id"]) maintainer_3 = "*****@*****.**" dataset_4 = custom_factories.Dataset(maintainer_email=maintainer_3) resource_5 = custom_factories.Resource(package_id=dataset_4["id"]) dataset_5 = custom_factories.Dataset(maintainer_email=maintainer_3) resource_6 = custom_factories.Resource(package_id=dataset_5["id"]) resource_7 = custom_factories.Resource(package_id=dataset_5["id"]) resource_8 = custom_factories.Resource(package_id=dataset_5["id"]) custom_helpers.make_broken((resource_1, resource_3, resource_4, resource_5, resource_6, resource_7), user=sysadmin) custom_helpers.make_working((resource_2, resource_8), user=sysadmin) response = self.app.get("/ckan-admin/broken_links", extra_environ=extra_environ) assert maintainer_1 in response assert maintainer_2 in response assert maintainer_3 in response assert dataset_1["name"] in response assert dataset_2["name"] not in response assert dataset_3["name"] in response assert dataset_4["name"] in response assert dataset_5["name"] in response
def test_broken_links_by_organization(self): user = factories.User() config.authorized_users = [user["name"]] org_1 = factories.Organization() dataset_1 = custom_factories.Dataset(owner_org=org_1["id"]) dataset_2 = custom_factories.Dataset(owner_org=org_1["id"]) resource_1 = custom_factories.Resource(package_id=dataset_1["id"]) resource_2 = custom_factories.Resource(package_id=dataset_2["id"]) org_2 = factories.Organization() dataset_3 = custom_factories.Dataset(owner_org=org_2["id"]) resource_3 = custom_factories.Resource(package_id=dataset_3["id"]) resource_4 = custom_factories.Resource(package_id=dataset_3["id"]) org_3 = factories.Organization() dataset_4 = custom_factories.Dataset(owner_org=org_3["id"]) resource_5 = custom_factories.Resource(package_id=dataset_4["id"]) dataset_5 = custom_factories.Dataset(owner_org=org_3["id"]) resource_6 = custom_factories.Resource(package_id=dataset_5["id"]) resource_7 = custom_factories.Resource(package_id=dataset_5["id"]) resource_8 = custom_factories.Resource(package_id=dataset_5["id"]) custom_helpers.make_broken((resource_1, resource_3, resource_4, resource_5, resource_6, resource_7), user=user) custom_helpers.make_working((resource_2, resource_8), user=user) response = self.app.get("/organization/broken_links") assert org_1["name"] in response assert org_2["name"] in response assert org_3["name"] in response assert dataset_1["name"] in response assert dataset_2["name"] not in response assert dataset_3["name"] in response assert dataset_4["name"] in response assert dataset_5["name"] in response
def test(self): """Test that deadoralive and ckanext-deadoralive work together. Add some resources with working and some with broken links to CKAN, run deadoralive, check that it added the right results. """ results.create_database_table() user = factories.User() # FunctionalTestBaseClass has already made self.app for us, but we # need one with our authorized_users config setting in it so replace it # with our own. config["ckanext.deadoralive.authorized_users"] = user["name"] self.app = custom_helpers._get_test_app() # The URL of the CKAN site we'll be using. # We'll be mocking the URLs on this domain that we expect to be sending # requests to. ckan_url = "http://test.ckan.org" # Mock some working and some broken resource URLs. # We'll create resources with these URLs in CKAN below. url_1 = "http://demo.ckan.org/url_1" httpretty.register_uri(httpretty.GET, url_1, status=200), url_2 = "http://demo.ckan.org/url_2" httpretty.register_uri(httpretty.GET, url_2, status=500), url_3 = "http://demo.ckan.org/url_3" httpretty.register_uri(httpretty.GET, url_3, status=200), # We're also going to mock the CKAN API URLs that deadoralive will be # requesting. We'll catch these requests and then forward them to a CKAN # test app. # FIXME: It would be nice if we could just mock http://test.ckan.org/* # and forward all requests on to the test app, but I don't think # httpretty supports this. get_resource_ids_url = (ckan_url + "/deadoralive/get_resources_to_check") httpretty.register_uri(httpretty.GET, get_resource_ids_url, body=self._forward_to_test_app) httpretty.register_uri(httpretty.POST, get_resource_ids_url, body=self._forward_to_test_app) get_url_for_id_url = ckan_url + "/deadoralive/get_url_for_resource_id" httpretty.register_uri(httpretty.GET, get_url_for_id_url, body=self._forward_to_test_app) httpretty.register_uri(httpretty.POST, get_url_for_id_url, body=self._forward_to_test_app) upsert_result_url = ckan_url + "/deadoralive/upsert" httpretty.register_uri(httpretty.GET, upsert_result_url, body=self._forward_to_test_app) httpretty.register_uri(httpretty.POST, upsert_result_url, body=self._forward_to_test_app) # Create the resources in CKAN whose links will be checked. resource_1 = custom_factories.Resource(url=url_1) resource_2 = custom_factories.Resource(url=url_2) resource_3 = custom_factories.Resource(url=url_3) # Call deadoralive: It should get the IDs of the three resources from # CKAN. get each resource's URL from CKAN, test each URL, and then post # the test results back to CKAN. before = datetime.datetime.utcnow() deadoralive.deadoralive.main("--url {0} --apikey {1}".format( ckan_url, user["apikey"]).split()) after = datetime.datetime.utcnow() # Now check that the links were checked and the correct results were # saved in ckanext-deadoralive's database table. # First check the two resources with working links. for resource in (resource_1, resource_3): result = helpers.call_action("ckanext_deadoralive_get", resource_id=resource["id"]) assert result["resource_id"] == resource["id"] assert result["alive"] is True last_checked = datetime.datetime.strptime(result["last_checked"], "%Y-%m-%dT%H:%M:%S.%f") assert last_checked > before assert last_checked < after last_successful = datetime.datetime.strptime( result["last_successful"], "%Y-%m-%dT%H:%M:%S.%f") assert last_successful > before assert last_successful < after assert result["num_fails"] == 0 assert result["pending"] is False assert result["pending_since"] is None assert result["status"] == 200 assert result["reason"] == "OK" # Now check the expected result for the resource with a broken link. result = helpers.call_action("ckanext_deadoralive_get", resource_id=resource_2["id"]) assert result["resource_id"] == resource_2["id"] assert result["alive"] is False last_checked = datetime.datetime.strptime(result["last_checked"], "%Y-%m-%dT%H:%M:%S.%f") assert last_checked > before assert last_checked < after assert result["last_successful"] is None assert result["num_fails"] == 1 assert result["pending"] is False assert result["pending_since"] is None assert result["status"] == 500 assert result["reason"] == "Internal Server Error"
def test_that_it_does_not_return_resources_with_pending_checks(self): """Resources with pending checks < 2 hours old should not be returned. """ now = datetime.datetime.utcnow() # Create 5 resources that have been checked in the last 24 hours. resource_1 = factories.Resource()['id'] resource_2 = factories.Resource()['id'] resource_3 = factories.Resource()['id'] resource_4 = factories.Resource()['id'] resource_5 = factories.Resource()['id'] twenty_hours_ago = now - datetime.timedelta(hours=20) results.upsert(resource_1, True, last_checked=twenty_hours_ago) results.upsert(resource_2, True, last_checked=twenty_hours_ago) results.upsert(resource_3, True, last_checked=twenty_hours_ago) results.upsert(resource_4, True, last_checked=twenty_hours_ago) results.upsert(resource_5, True, last_checked=twenty_hours_ago) # Create 5 resources with pending checks from < 2 hours ago. resource_6 = factories.Resource()['id'] resource_7 = factories.Resource()['id'] resource_8 = factories.Resource()['id'] resource_9 = factories.Resource()['id'] resource_10 = factories.Resource()['id'] one_hour_ago = now - datetime.timedelta(hours=1) results._make_pending( [resource_6, resource_7, resource_8, resource_9, resource_10], one_hour_ago) # Create 5 resources that were last checked more than 24 hours ago. resource_11 = factories.Resource()['id'] resource_12 = factories.Resource()['id'] resource_13 = factories.Resource()['id'] resource_14 = factories.Resource()['id'] resource_15 = factories.Resource()['id'] results.upsert(resource_11, True, last_checked=now - datetime.timedelta(hours=35)) results.upsert(resource_12, True, last_checked=now - datetime.timedelta(hours=34)) results.upsert(resource_13, True, last_checked=now - datetime.timedelta(hours=33)) results.upsert(resource_14, True, last_checked=now - datetime.timedelta(hours=32)) results.upsert(resource_15, True, last_checked=now - datetime.timedelta(hours=31)) resources_to_check = results.get_resources_to_check(10) assert resources_to_check == [ resource_11, resource_12, resource_13, resource_14, resource_15 ]