Example #1
0
    def test_12_workflow_state(self):
        # Check we can construct and work with WorkflowState objects

        # make a blank one just in case we need to
        wfs = WorkflowState()

        # now make one from source
        source = WorkflowStateFixtureFactory.example()
        wfs = WorkflowState(source)

        assert wfs.last_request == "2003-01-01T00:00:00Z"
        assert wfs.already_processed == ["123456789", "987654321"]

        # now hit the setters, and check the round-trip
        wfs.last_request = "2004-01-01T00:00:00Z"
        wfs.already_processed = ["abcdefg"]

        assert wfs.last_request == "2004-01-01T00:00:00Z"
        assert wfs.already_processed == ["abcdefg"]

        wfs.add_processed("qwerty")

        assert wfs.already_processed == ["abcdefg", "qwerty"]
        assert wfs.is_processed("qwerty")
        assert wfs.is_processed("abcdefg")
        assert not wfs.is_processed("random")

        # now make one with broken content
        with self.assertRaises(dataobj.DataStructureException):
            wfs = WorkflowState({"junk" : "data"})
Example #2
0
    def test_14_process_enhancements_exception(self):
        # What happens when processing an enhancement fails

        sources = EnhancementFixtureFactory.request_per_day("2001-01", 9)

        dois = ["10.1234/first", "10.1234/second", "10.1234/third"]

        # we're going to construct a series of enhancements for each doi
        for i in range(len(sources)):
            s = sources[i]
            doi_idx = i % 3  # iterate over the dois 3 times
            doi = dois[doi_idx]
            s["record"]["dc:identifier"] = [{"type": "doi", "id": doi}]
            en = Enhancement(s)
            en.save()

        time.sleep(2)

        # set up the mock
        PublicApi.publish = publish_mock

        # now run the process job back to the first day
        with self.assertRaises(TestException):
            WorkflowApi.process_enhancements()

        time.sleep(2)

        # we know this died during the 6th update request being processed,
        # so just check that the workflow state reflects that
        wfs_dao = WorkflowState()
        wfs = wfs_dao.pull("enhancements")
        assert wfs.last_request == "2001-01-05T00:00:00Z"
        assert len(wfs.already_processed) == 1
Example #3
0
    def test_11_process_requests_exception(self):
        # What happens when the process_reuests method fails for a variety of reasons

        sources = RequestFixtureFactory.request_per_day("2001-01", 9)

        dois = ["10.1234/first", "10.1234/second", "10.1234/third"]

        # we're going to construct a series of requests for each doi
        # starting with a create, then an update, followed by a delete
        # (not that it matters, as we're going to pump them through a mock)
        for i in range(len(sources)):
            s = sources[i]
            doi_idx = i % 3  # iterate over the dois 3 times
            doi = dois[doi_idx]
            s["record"]["dc:identifier"] = [{"type": "doi", "id": doi}]
            if i < 3:
                s["record"]["dc:title"] = "Create"
                req = Request(s)
                req.action = "update"
                req.save()
            elif i < 6:
                s["record"]["dc:title"] = "Update"
                req = Request(s)
                req.action = "update"
                req.save()
            else:
                s["record"]["dc:title"] = "Delete"
                req = Request(s)
                req.action = "delete"
                req.save()

        time.sleep(2)

        # set up the mocks
        PublicApi.publish = publish_mock
        PublicApi.remove = delete_mock

        # now run the process job back to the first day
        with self.assertRaises(TestException):
            WorkflowApi.process_requests()

        # we know this died during the 6th update request being processed,
        # so just check that the workflow state reflects that
        wfs_dao = WorkflowState()
        wfs = wfs_dao.pull("requests")
        assert wfs.last_request == "2001-01-05T00:00:00Z"
        assert len(wfs.already_processed) == 1
Example #4
0
    def process_enhancements(cls):
        """
        Go through any new Enhancements (since this method last ran) and process them.

        :return:
        """
        # first, pick up our current state from storage
        workflow_dao = WorkflowState()
        wfs = workflow_dao.pull("enhancements")

        # if we don't have a current state, make one
        if wfs is None:
            wfs = WorkflowState()
            wfs.id = "enhancements"

        # get the oldest page of enhancements and process them
        dao = Enhancement()
        enhancements = dao.list_all_since(wfs.last_request)     # produces a generator

        for e in enhancements:
            try:
                # if the request was created at the time of the last request processed, it is possible it arrived
                # before or after the cut-off.  As we don't have any more than second-level granularity in the timing,
                # we also need to check to see whether it was one of the ids processed during that second
                if e.created_date == wfs.last_request and wfs.is_processed(e.id):
                    # if it was created at that time, and it was one of the ones processed, we can skip it
                    continue

                # if the request is from a later time, or was not processed during the last run, then do the usual
                # processing, which in this case is just to publish the data, and let the merge handle it
                PublicApi.publish(e)

                # now, revisit the timing of this request.  If the time is the same as the last request date, this is a
                # request which came in during that same second, but was not processed at the time because it was at the
                # wrong end of the second.  In that case, we just need to add the id to the list of records from that second
                # which have now been processed
                if e.created_date == wfs.last_request:
                    wfs.add_processed(e.id)
                else:
                    # otherwise, this is a whole new second, and we can forget everything that went before and start afresh.
                    wfs.last_request = e.created_date
                    wfs.already_processed = [e.id]
            except:
                wfs.save(blocking=True)
                raise

        wfs.save(blocking=True)
Example #5
0
    def test_13_process_ehnancements_cycle(self):
        # Run through the process of processing an enhancement

        source = EnhancementFixtureFactory.example()
        if "id" in source:
            del source["id"]

        pub_dao = PublicAPC()
        wfs_dao = WorkflowState()

        # first make a public record for us to enhance
        first = PublicAPCFixtureFactory.example()
        del first["record"]["dc:title"]
        pub = PublicAPC(first)
        pub.save(blocking=True)

        # now create an enhancements on the record
        second = deepcopy(source)
        second["record"]["dc:title"] = "Update"
        second["created_date"] = "2002-01-01T00:00:00Z"
        en = Enhancement(second)
        en.public_id = pub.id
        en.save(blocking=True)

        # run the job
        WorkflowApi.process_enhancements()

        time.sleep(2)

        # check that the workflow state was created
        wfs = wfs_dao.pull("enhancements")
        assert wfs is not None
        assert wfs.last_request == en.created_date
        assert wfs.already_processed == [en.id]

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert pubs[0].record.get("dc:title") == "Update"

        # now run an update with the same date, to observe the difference in the workflow state
        third = deepcopy(source)
        third["record"]["dc:title"] = "Update 2"
        third["created_date"] = "2002-01-01T00:00:00Z"
        en2 = Enhancement(third)
        en2.public_id = pub.id
        en2.save(blocking=True)

        # run the job again
        WorkflowApi.process_enhancements()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert (
            pubs[0].record.get("dc:title") == "Update"
        )  # should not have been updated, since data was already present

        # check that the workflow state was updated
        wfs = wfs_dao.pull("enhancements")
        assert wfs is not None
        assert wfs.last_request == en2.created_date
        assert wfs.already_processed == [en.id, en2.id]  # processed records should have been appended
Example #6
0
    def test_11_process_requests_cycle(self):
        # Run through the process of processing a Request into a PublicAPC

        source = RequestFixtureFactory.example()
        if "id" in source:
            del source["id"]

        pub_dao = PublicAPC()
        wfs_dao = WorkflowState()

        # first make a record for the first time
        first = deepcopy(source)
        del first["record"]["dc:title"]
        req = Request(first)
        req.owner = "test"
        req.action = "update"
        req.save(blocking=True)

        # run the job
        WorkflowApi.process_requests()

        time.sleep(2)

        # first check that a public record was made
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert pubs[0].record.get("dc:title") is None

        # check that the workflow state was created
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req.created_date
        assert wfs.already_processed == [req.id]

        # now run an update with a different date
        second = deepcopy(source)
        second["record"]["dc:title"] = "Update"
        second["created_date"] = "2002-01-01T00:00:00Z"
        req2 = Request(second)
        req2.owner = "test"
        req2.action = "update"
        req2.save(blocking=True)

        # run the job again
        WorkflowApi.process_requests()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert pubs[0].record.get("dc:title") == "Update"

        # check that the workflow state was updated
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req2.created_date
        assert wfs.already_processed == [req2.id]

        # now run an update with the same date, to observe the difference in the workflow state
        third = deepcopy(source)
        third["record"]["dc:title"] = "Update 2"
        third["created_date"] = "2002-01-01T00:00:00Z"
        req3 = Request(third)
        req3.owner = "test"
        req3.action = "update"
        req3.save(blocking=True)

        # run the job again
        WorkflowApi.process_requests()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert (
            pubs[0].record.get("dc:title") == "Update 2"
        )  # should have been updated, as there are only apc contributions from one source

        # check that the workflow state was updated
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req3.created_date
        assert wfs.already_processed == [req2.id, req3.id]  # processed records should have been appended

        # finally issue a delete request
        fourth = deepcopy(source)
        fourth["created_date"] = "2003-01-01T00:00:00Z"
        req4 = Request(fourth)
        req4.owner = "test"
        req4.action = "delete"
        req4.save(blocking=True)

        # run the job again
        WorkflowApi.process_requests()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 0

        # check that the workflow state was updated
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req4.created_date
        assert wfs.already_processed == [req4.id]  # processed records should have been appended