Ejemplo n.º 1
0
def upsert(context, data_dict, last_checked=None):
    """Save a link check result for a resource.

    :param resource_id: the id of the resource that was checked
    :type resource_id: string

    :param alive: whether or not the link was found to be alive
    :type alive: bool

    :param status: the HTTP status code when the resource was checked,
        e.g. 200, 404 or 500
    :type status: int

    :param reason: the reason for the failed or successful resource check,
        e.g. "OK", "Not Found", "Internal Server Error"
    :type reason: string

    """
    toolkit.check_access("ckanext_deadoralive_upsert", context, data_dict)

    # TODO: Validation.

    resource_id = data_dict["resource_id"]
    alive = data_dict["alive"]
    status = data_dict.get("status")
    reason = data_dict.get("reason")

    results.upsert(resource_id, alive, status=status, reason=reason,
                   last_checked=last_checked)
Ejemplo n.º 2
0
    def test_with_some_resources_checked_recently_and_some_never(self):
        """

        If there are 5 resources that have been checked in last 24 hours and 5
        that have never been checked and 10 resources are requested, it should
        return the 5 that have not been checked, sorted oldest-resource-first.

        """
        resource_1 = factories.Resource()['id']
        resource_2 = factories.Resource()['id']
        resource_3 = factories.Resource()['id']
        resource_4 = factories.Resource()['id']
        resource_5 = factories.Resource()['id']
        twenty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=23)
        results.upsert(resource_1, True, last_checked=twenty_hours_ago)
        results.upsert(resource_2, True, last_checked=twenty_hours_ago)
        results.upsert(resource_3, True, last_checked=twenty_hours_ago)
        results.upsert(resource_4, True, last_checked=twenty_hours_ago)
        results.upsert(resource_5, True, last_checked=twenty_hours_ago)
        resource_6 = factories.Resource()['id']
        resource_7 = factories.Resource()['id']
        resource_8 = factories.Resource()['id']
        resource_9 = factories.Resource()['id']
        resource_10 = factories.Resource()['id']

        resources_to_check = results.get_resources_to_check(10)

        assert resources_to_check == [resource_6, resource_7, resource_8,
                                      resource_9, resource_10]
Ejemplo n.º 3
0
    def test_update_with_unicode(self):
        results.upsert("test_resource_id", True, status=200, reason="OK")

        results.upsert("test_resource_id", False, status=404,
                       reason=u"Föoßär")

        result = results.get("test_resource_id")
        assert result["reason"] == u"Föoßär"
Ejemplo n.º 4
0
    def test_update_with_no_status_or_reason_clears(self):
        """Passing no status or reason to upsert() should clear existing."""
        results.upsert("test_resource_id", True, status=200, reason="OK")

        results.upsert("test_resource_id", False)

        result = results.get("test_resource_id")
        assert result["status"] is None
        assert result["reason"] is None
Ejemplo n.º 5
0
    def test_update_replacing_status_and_reason(self):
        """Passing status and reason params to upsert() should overwrite."""
        results.upsert("test_resource_id", True, status=200, reason="OK")

        results.upsert("test_resource_id", False, status=404,
                       reason="Not Found")

        result = results.get("test_resource_id")
        assert result["status"] == 404
        assert result["reason"] == "Not Found"
Ejemplo n.º 6
0
    def test_custom_longer_since(self):
        """If given a longer ``since`` time it should not return resources that
        were checked more recently."""
        test_resource = factories.Resource()['id']
        thirty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=30)
        results.upsert(test_resource, True, last_checked=thirty_hours_ago)

        results_ = results.get_resources_to_check(
            10, since=datetime.timedelta(hours=48))

        assert results_ == []
Ejemplo n.º 7
0
    def test_custom_shorter_since(self):
        """If given a shorter ``since`` time it should return resources that
        have been checked more recently."""
        test_resource = factories.Resource()['id']
        ten_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=10)
        results.upsert(test_resource, True, last_checked=ten_hours_ago)

        results_ = results.get_resources_to_check(
            10, since=datetime.timedelta(hours=5))

        assert len(results_) == 1
        assert results_[0] == test_resource
Ejemplo n.º 8
0
    def test_pending_result_does_not_change_num_fails(self):
        """Inserting a new pending result should not change num_fails.

        If we already have a results row for a resource, then we change that row
        to make a pending result, this should not change num_fails or other
        fields.

        """
        # Make a resource with 1 success then 3 consecutive fails.
        results.upsert("test_resource_id", True)
        last_successful = results.get("test_resource_id")["last_successful"]
        results.upsert("test_resource_id", False)
        results.upsert("test_resource_id", False)
        results.upsert("test_resource_id", False)
        num_fails = results.get("test_resource_id")["num_fails"]
        last_checked = results.get("test_resource_id")["last_checked"]

        before = datetime.datetime.utcnow()
        results._make_pending(["test_resource_id"])
        after = datetime.datetime.utcnow()

        result = results.get("test_resource_id")
        assert result["num_fails"] == num_fails
        assert result["last_successful"] == last_successful
        assert result["last_checked"] == last_checked
        assert result["alive"] is False
        assert result["pending"] is True
        assert strptime(result["pending_since"]) > before
        assert strptime(result["pending_since"]) < after
Ejemplo n.º 9
0
 def test_insert_result_with_status_and_reason(self):
     """Test cresting a new result row with a status and reason."""
     before = datetime.datetime.utcnow()
     results.upsert("test_resource_id", False, status=500,
                    reason="Internal Server Error")
     after = datetime.datetime.utcnow()
     result = results.get("test_resource_id")
     assert result["resource_id"] == "test_resource_id"
     assert result["alive"] is False
     assert strptime(result["last_checked"]) > before
     assert strptime(result["last_checked"]) < after
     assert result["last_successful"] is None
     assert result["num_fails"] == 1
     assert result["pending"] is False
     assert result["pending_since"] is None
     assert result["status"] == 500
     assert result["reason"] == "Internal Server Error"
Ejemplo n.º 10
0
    def test_reset_pending_status(self):
        """Test that either a successful or failed result resets pending and
        pending_since.

        """
        import ckan.model

        result = results._LinkCheckerResult(
            "test_resource_id", None, pending=True)
        result.pending = True
        result.pending_since
        ckan.model.Session.add(result)
        ckan.model.Session.commit()

        results.upsert("test_resource_id", True)

        result = results.get("test_resource_id")

        assert result["pending"] is False
        assert result["pending_since"] is None
Ejemplo n.º 11
0
    def test_insert_failed_result(self):
        """Test checking a resource for the first time when the link is broken.

        """
        before = datetime.datetime.utcnow()
        results.upsert("test_resource_id", False)
        after = datetime.datetime.utcnow()
        result = results.get("test_resource_id")
        assert result["resource_id"] == "test_resource_id"
        assert result["alive"] is False
        assert strptime(result["last_checked"]) > before
        assert strptime(result["last_checked"]) < after
        assert result["last_successful"] is None
        assert result["num_fails"] == 1
        assert result["pending"] is False
        assert result["pending_since"] is None

        # status and reason should be None, since we didn't pass either to
        # upsert().
        assert result["status"] is None
        assert result["reason"] is None
Ejemplo n.º 12
0
    def test_update_with_failed_result(self):
        """Test updating a resource's row with a new failed result."""
        results.upsert("test_resource_id", True)

        before = datetime.datetime.utcnow()
        results.upsert("test_resource_id", False)
        after = datetime.datetime.utcnow()

        result = results.get("test_resource_id")
        assert result["resource_id"] == "test_resource_id"
        assert result["alive"] is False
        assert strptime(result["last_checked"]) > before
        assert strptime(result["last_checked"]) < after
        assert strptime(result["last_successful"]) < before
        assert result["num_fails"] == 1
        assert result["pending"] is False
        assert result["pending_since"] is None

        # status and reason should be None, since we didn't pass either to
        # upsert().
        assert result["status"] is None
        assert result["reason"] is None
Ejemplo n.º 13
0
    def test_make_pending_does_not_change_status_or_reason(self):
        """Marking a result as pending should not change status or reason.

        Marking a result as pending just says "we are expecting a new result
        for this resource soon", it should not change the existing results.

        """
        results.upsert("test_resource_id", True, status=200, reason="OK")
        last_successful = results.get("test_resource_id")["last_successful"]
        results.upsert("test_resource_id", False, status=401,
                       reason="Unauthorized")
        last_checked = results.get("test_resource_id")["last_checked"]

        results._make_pending(["test_resource_id"])

        result = results.get("test_resource_id")
        assert result["num_fails"] == 1
        assert result["last_successful"] == last_successful
        assert result["last_checked"] == last_checked
        assert result["alive"] is False
        assert result["pending"] is True
        assert result["status"] == 401
        assert result["reason"] == "Unauthorized"
Ejemplo n.º 14
0
    def test_when_all_resources_have_been_checked_recently(self):
        """

        If there are 5 resources and they have all been checked in last 24 hours
        then it should return an empty list.

        """
        resource_1 = factories.Resource()['id']
        resource_2 = factories.Resource()['id']
        resource_3 = factories.Resource()['id']
        resource_4 = factories.Resource()['id']
        resource_5 = factories.Resource()['id']
        twenty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=23)
        results.upsert(resource_1, True, last_checked=twenty_hours_ago)
        results.upsert(resource_2, True, last_checked=twenty_hours_ago)
        results.upsert(resource_3, True, last_checked=twenty_hours_ago)
        results.upsert(resource_4, True, last_checked=twenty_hours_ago)
        results.upsert(resource_5, True, last_checked=twenty_hours_ago)

        resources_to_check = results.get_resources_to_check(10)

        assert resources_to_check == []
Ejemplo n.º 15
0
    def test_that_it_creates_pending_checks(self):
        """get_resources_to_check() should create pending link checker results
        for all the resources it returns."""

        # A resource that has never been checked.
        resource_1 = factories.Resource()['id']

        # A resource that was checked > 24 hours ago.
        resource_2 = factories.Resource()['id']
        thirty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=30)
        results.upsert(resource_2, True, last_checked=thirty_hours_ago)

        # A resource with a pending check from > 2 hours ago.
        resource_3 = factories.Resource()['id']
        three_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=3)
        results._make_pending([resource_3], three_hours_ago)
        orgs = config.organization_to_filter
        results.get_resources_to_check(10, orgs)

        for resource in (resource_1, resource_2, resource_3):
            result = results.get(resource)
            assert result["pending"] is True
Ejemplo n.º 16
0
    def test_that_it_creates_pending_checks(self):
        """get_resources_to_check() should create pending link checker results
        for all the resources it returns."""

        # A resource that has never been checked.
        resource_1 = factories.Resource()['id']

        # A resource that was checked > 24 hours ago.
        resource_2 = factories.Resource()['id']
        thirty_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=30)
        results.upsert(resource_2, True, last_checked=thirty_hours_ago)

        # A resource with a pending check from > 2 hours ago.
        resource_3 = factories.Resource()['id']
        three_hours_ago = datetime.datetime.utcnow() - datetime.timedelta(
            hours=3)
        results._make_pending([resource_3], three_hours_ago)

        results.get_resources_to_check(10)

        for resource in (resource_1, resource_2, resource_3):
            result = results.get(resource)
            assert result["pending"] is True
Ejemplo n.º 17
0
    def test_with_many_results(self):
        results.upsert("test_resource_1", True)
        results.upsert("test_resource_2", False)
        results.upsert("test_resource_3", True)

        results_ = results.all()
        assert len(results_) == 3
        assert results_[0]["resource_id"] == "test_resource_1"
        assert results_[1]["resource_id"] == "test_resource_2"
        assert results_[2]["resource_id"] == "test_resource_3"
Ejemplo n.º 18
0
    def test_with_many_results(self):
        results.upsert("test_resource_1", True)
        results.upsert("test_resource_2", False)
        results.upsert("test_resource_3", True)

        results_ = results.all()
        assert len(results_) == 3
        assert results_[0]["resource_id"] == "test_resource_1"
        assert results_[1]["resource_id"] == "test_resource_2"
        assert results_[2]["resource_id"] == "test_resource_3"
Ejemplo n.º 19
0
    def test_incrementing_num_fails(self):
        """Test that repeated bad results increment num_fails."""

        results.upsert("test_resource_id", False)
        results.upsert("test_resource_id", False)
        before = datetime.datetime.utcnow()
        results.upsert("test_resource_id", False)
        after = datetime.datetime.utcnow()

        result = results.get("test_resource_id")

        assert result["num_fails"] == 3
        assert strptime(result["last_checked"]) > before
        assert strptime(result["last_checked"]) < after
Ejemplo n.º 20
0
    def test_incrementing_num_fails(self):
        """Test that repeated bad results increment num_fails."""

        results.upsert("test_resource_id", False)
        results.upsert("test_resource_id", False)
        before = datetime.datetime.utcnow()
        results.upsert("test_resource_id", False)
        after = datetime.datetime.utcnow()

        result = results.get("test_resource_id")

        assert result["num_fails"] == 3
        assert strptime(result["last_checked"]) > before
        assert strptime(result["last_checked"]) < after
Ejemplo n.º 21
0
    def test_reset_num_fails(self):
        """Test that a successful result resets num_fails to 0."""

        results.upsert("test_resource_id", False)
        results.upsert("test_resource_id", False)
        before = datetime.datetime.utcnow()
        results.upsert("test_resource_id", True)
        after = datetime.datetime.utcnow()

        result = results.get("test_resource_id")

        assert result["num_fails"] == 0
        assert strptime(result["last_checked"]) > before
        assert strptime(result["last_checked"]) < after
        assert strptime(result["last_successful"]) > before
        assert strptime(result["last_successful"]) < after
Ejemplo n.º 22
0
    def test_reset_num_fails(self):
        """Test that a successful result resets num_fails to 0."""

        results.upsert("test_resource_id", False)
        results.upsert("test_resource_id", False)
        before = datetime.datetime.utcnow()
        results.upsert("test_resource_id", True)
        after = datetime.datetime.utcnow()

        result = results.get("test_resource_id")

        assert result["num_fails"] == 0
        assert strptime(result["last_checked"]) > before
        assert strptime(result["last_checked"]) < after
        assert strptime(result["last_successful"]) > before
        assert strptime(result["last_successful"]) < after
Ejemplo n.º 23
0
    def test_with_one_result(self):
        results.upsert("test_resource_id", True)

        results_ = results.all()
        assert len(results_) == 1
        assert results_[0]["resource_id"] == "test_resource_id"
Ejemplo n.º 24
0
    def test_that_it_does_not_return_resources_with_pending_checks(self):
        """Resources with pending checks < 2 hours old should not be returned.

        """
        now = datetime.datetime.utcnow()

        # Create 5 resources that have been checked in the last 24 hours.
        resource_1 = factories.Resource()['id']
        resource_2 = factories.Resource()['id']
        resource_3 = factories.Resource()['id']
        resource_4 = factories.Resource()['id']
        resource_5 = factories.Resource()['id']
        twenty_hours_ago = now - datetime.timedelta(hours=20)
        results.upsert(resource_1, True, last_checked=twenty_hours_ago)
        results.upsert(resource_2, True, last_checked=twenty_hours_ago)
        results.upsert(resource_3, True, last_checked=twenty_hours_ago)
        results.upsert(resource_4, True, last_checked=twenty_hours_ago)
        results.upsert(resource_5, True, last_checked=twenty_hours_ago)

        # Create 5 resources with pending checks from < 2 hours ago.
        resource_6 = factories.Resource()['id']
        resource_7 = factories.Resource()['id']
        resource_8 = factories.Resource()['id']
        resource_9 = factories.Resource()['id']
        resource_10 = factories.Resource()['id']
        one_hour_ago = now - datetime.timedelta(hours=1)
        results._make_pending(
            [resource_6, resource_7, resource_8, resource_9, resource_10],
            one_hour_ago)

        # Create 5 resources that were last checked more than 24 hours ago.
        resource_11 = factories.Resource()['id']
        resource_12 = factories.Resource()['id']
        resource_13 = factories.Resource()['id']
        resource_14 = factories.Resource()['id']
        resource_15 = factories.Resource()['id']
        results.upsert(resource_11,
                       True,
                       last_checked=now - datetime.timedelta(hours=35))
        results.upsert(resource_12,
                       True,
                       last_checked=now - datetime.timedelta(hours=34))
        results.upsert(resource_13,
                       True,
                       last_checked=now - datetime.timedelta(hours=33))
        results.upsert(resource_14,
                       True,
                       last_checked=now - datetime.timedelta(hours=32))
        results.upsert(resource_15,
                       True,
                       last_checked=now - datetime.timedelta(hours=31))

        resources_to_check = results.get_resources_to_check(10)

        assert resources_to_check == [
            resource_11, resource_12, resource_13, resource_14, resource_15
        ]
Ejemplo n.º 25
0
    def test_with_some_resources_checked_recently_and_some_not_recently(self):
        """

        If there are 5 resources that have been checked in last 24 hours and 5
        that were last checked more than 24 hours ago and 10 resources are
        requested, it should return the 5 that have not been checked recently,
        sorted most-recently-checked last.

        """
        now = datetime.datetime.utcnow()
        resource_1 = factories.Resource()['id']
        resource_2 = factories.Resource()['id']
        resource_3 = factories.Resource()['id']
        resource_4 = factories.Resource()['id']
        resource_5 = factories.Resource()['id']
        twenty_hours_ago = now - datetime.timedelta(hours=23)
        results.upsert(resource_1, True, last_checked=twenty_hours_ago)
        results.upsert(resource_2, True, last_checked=twenty_hours_ago)
        results.upsert(resource_3, True, last_checked=twenty_hours_ago)
        results.upsert(resource_4, True, last_checked=twenty_hours_ago)
        results.upsert(resource_5, True, last_checked=twenty_hours_ago)
        resource_6 = factories.Resource()['id']
        resource_7 = factories.Resource()['id']
        resource_8 = factories.Resource()['id']
        resource_9 = factories.Resource()['id']
        resource_10 = factories.Resource()['id']
        # We mix up the order in which these resources were checked a bit.
        results.upsert(resource_7,
                       True,
                       last_checked=now - datetime.timedelta(hours=34))
        results.upsert(resource_6,
                       True,
                       last_checked=now - datetime.timedelta(hours=33))
        results.upsert(resource_9,
                       True,
                       last_checked=now - datetime.timedelta(hours=32))
        results.upsert(resource_10,
                       True,
                       last_checked=now - datetime.timedelta(hours=31))
        results.upsert(resource_8,
                       True,
                       last_checked=now - datetime.timedelta(hours=30))

        resources_to_check = results.get_resources_to_check(10)

        assert resources_to_check == [
            resource_7, resource_6, resource_9, resource_10, resource_8
        ]
Ejemplo n.º 26
0
    def test_with_some_resources_checked_recently_and_some_not_recently(self):
        """

        If there are 5 resources that have been checked in last 24 hours and 5
        that were last checked more than 24 hours ago and 10 resources are
        requested, it should return the 5 that have not been checked recently,
        sorted most-recently-checked last.

        """
        now = datetime.datetime.utcnow()
        resource_1 = factories.Resource()['id']
        resource_2 = factories.Resource()['id']
        resource_3 = factories.Resource()['id']
        resource_4 = factories.Resource()['id']
        resource_5 = factories.Resource()['id']
        twenty_hours_ago = now - datetime.timedelta(hours=23)
        results.upsert(resource_1, True, last_checked=twenty_hours_ago)
        results.upsert(resource_2, True, last_checked=twenty_hours_ago)
        results.upsert(resource_3, True, last_checked=twenty_hours_ago)
        results.upsert(resource_4, True, last_checked=twenty_hours_ago)
        results.upsert(resource_5, True, last_checked=twenty_hours_ago)
        resource_6 = factories.Resource()['id']
        resource_7 = factories.Resource()['id']
        resource_8 = factories.Resource()['id']
        resource_9 = factories.Resource()['id']
        resource_10 = factories.Resource()['id']
        # We mix up the order in which these resources were checked a bit.
        results.upsert(
            resource_7, True, last_checked=now - datetime.timedelta(hours=34))
        results.upsert(
            resource_6, True, last_checked=now - datetime.timedelta(hours=33))
        results.upsert(
            resource_9, True, last_checked=now - datetime.timedelta(hours=32))
        results.upsert(
            resource_10, True, last_checked=now - datetime.timedelta(hours=31))
        results.upsert(
            resource_8, True, last_checked=now - datetime.timedelta(hours=30))

        resources_to_check = results.get_resources_to_check(10)

        assert resources_to_check == [resource_7, resource_6, resource_9,
                                      resource_10, resource_8]
Ejemplo n.º 27
0
 def test_insert_result_with_unicode(self):
     """Test upsert() and get() with non-ASCII chars in the reason string."""
     results.upsert("test_resource_id", False, status=500,
                    reason=u"Föobäß")
     result = results.get("test_resource_id")
     assert result["reason"] == u"Föobäß"
Ejemplo n.º 28
0
    def test_with_one_result(self):
        results.upsert("test_resource_id", True)

        results_ = results.all()
        assert len(results_) == 1
        assert results_[0]["resource_id"] == "test_resource_id"
Ejemplo n.º 29
0
 def test_insert_result_with_unicode(self):
     """Test upsert() and get() with non-ASCII chars in the reason string."""
     results.upsert("test_resource_id", False, status=500, reason=u"Föobäß")
     result = results.get("test_resource_id")
     assert result["reason"] == u"Föobäß"
Ejemplo n.º 30
0
    def test_that_it_does_not_return_resources_with_pending_checks(self):
        """Resources with pending checks < 2 hours old should not be returned.

        """
        now = datetime.datetime.utcnow()

        # Create 5 resources that have been checked in the last 24 hours.
        resource_1 = factories.Resource()['id']
        resource_2 = factories.Resource()['id']
        resource_3 = factories.Resource()['id']
        resource_4 = factories.Resource()['id']
        resource_5 = factories.Resource()['id']
        twenty_hours_ago = now - datetime.timedelta(hours=20)
        results.upsert(resource_1, True, last_checked=twenty_hours_ago)
        results.upsert(resource_2, True, last_checked=twenty_hours_ago)
        results.upsert(resource_3, True, last_checked=twenty_hours_ago)
        results.upsert(resource_4, True, last_checked=twenty_hours_ago)
        results.upsert(resource_5, True, last_checked=twenty_hours_ago)

        # Create 5 resources with pending checks from < 2 hours ago.
        resource_6 = factories.Resource()['id']
        resource_7 = factories.Resource()['id']
        resource_8 = factories.Resource()['id']
        resource_9 = factories.Resource()['id']
        resource_10 = factories.Resource()['id']
        one_hour_ago = now - datetime.timedelta(hours=1)
        results._make_pending(
            [resource_6, resource_7, resource_8, resource_9, resource_10],
            one_hour_ago)

        # Create 5 resources that were last checked more than 24 hours ago.
        resource_11 = factories.Resource()['id']
        resource_12 = factories.Resource()['id']
        resource_13 = factories.Resource()['id']
        resource_14 = factories.Resource()['id']
        resource_15 = factories.Resource()['id']
        results.upsert(resource_11, True,
                       last_checked=now - datetime.timedelta(hours=35))
        results.upsert(resource_12, True,
                       last_checked=now - datetime.timedelta(hours=34))
        results.upsert(resource_13, True,
                       last_checked=now - datetime.timedelta(hours=33))
        results.upsert(resource_14, True,
                       last_checked=now - datetime.timedelta(hours=32))
        results.upsert(resource_15, True,
                       last_checked=now - datetime.timedelta(hours=31))

        resources_to_check = results.get_resources_to_check(10)

        assert resources_to_check == [resource_11, resource_12, resource_13,
                                      resource_14, resource_15]