def test_12_workflow_state(self): # Check we can construct and work with WorkflowState objects # make a blank one just in case we need to wfs = WorkflowState() # now make one from source source = WorkflowStateFixtureFactory.example() wfs = WorkflowState(source) assert wfs.last_request == "2003-01-01T00:00:00Z" assert wfs.already_processed == ["123456789", "987654321"] # now hit the setters, and check the round-trip wfs.last_request = "2004-01-01T00:00:00Z" wfs.already_processed = ["abcdefg"] assert wfs.last_request == "2004-01-01T00:00:00Z" assert wfs.already_processed == ["abcdefg"] wfs.add_processed("qwerty") assert wfs.already_processed == ["abcdefg", "qwerty"] assert wfs.is_processed("qwerty") assert wfs.is_processed("abcdefg") assert not wfs.is_processed("random") # now make one with broken content with self.assertRaises(dataobj.DataStructureException): wfs = WorkflowState({"junk" : "data"})
def test_14_process_enhancements_exception(self): # What happens when processing an enhancement fails sources = EnhancementFixtureFactory.request_per_day("2001-01", 9) dois = ["10.1234/first", "10.1234/second", "10.1234/third"] # we're going to construct a series of enhancements for each doi for i in range(len(sources)): s = sources[i] doi_idx = i % 3 # iterate over the dois 3 times doi = dois[doi_idx] s["record"]["dc:identifier"] = [{"type": "doi", "id": doi}] en = Enhancement(s) en.save() time.sleep(2) # set up the mock PublicApi.publish = publish_mock # now run the process job back to the first day with self.assertRaises(TestException): WorkflowApi.process_enhancements() time.sleep(2) # we know this died during the 6th update request being processed, # so just check that the workflow state reflects that wfs_dao = WorkflowState() wfs = wfs_dao.pull("enhancements") assert wfs.last_request == "2001-01-05T00:00:00Z" assert len(wfs.already_processed) == 1
def test_11_process_requests_exception(self): # What happens when the process_reuests method fails for a variety of reasons sources = RequestFixtureFactory.request_per_day("2001-01", 9) dois = ["10.1234/first", "10.1234/second", "10.1234/third"] # we're going to construct a series of requests for each doi # starting with a create, then an update, followed by a delete # (not that it matters, as we're going to pump them through a mock) for i in range(len(sources)): s = sources[i] doi_idx = i % 3 # iterate over the dois 3 times doi = dois[doi_idx] s["record"]["dc:identifier"] = [{"type": "doi", "id": doi}] if i < 3: s["record"]["dc:title"] = "Create" req = Request(s) req.action = "update" req.save() elif i < 6: s["record"]["dc:title"] = "Update" req = Request(s) req.action = "update" req.save() else: s["record"]["dc:title"] = "Delete" req = Request(s) req.action = "delete" req.save() time.sleep(2) # set up the mocks PublicApi.publish = publish_mock PublicApi.remove = delete_mock # now run the process job back to the first day with self.assertRaises(TestException): WorkflowApi.process_requests() # we know this died during the 6th update request being processed, # so just check that the workflow state reflects that wfs_dao = WorkflowState() wfs = wfs_dao.pull("requests") assert wfs.last_request == "2001-01-05T00:00:00Z" assert len(wfs.already_processed) == 1
def process_enhancements(cls): """ Go through any new Enhancements (since this method last ran) and process them. :return: """ # first, pick up our current state from storage workflow_dao = WorkflowState() wfs = workflow_dao.pull("enhancements") # if we don't have a current state, make one if wfs is None: wfs = WorkflowState() wfs.id = "enhancements" # get the oldest page of enhancements and process them dao = Enhancement() enhancements = dao.list_all_since(wfs.last_request) # produces a generator for e in enhancements: try: # if the request was created at the time of the last request processed, it is possible it arrived # before or after the cut-off. As we don't have any more than second-level granularity in the timing, # we also need to check to see whether it was one of the ids processed during that second if e.created_date == wfs.last_request and wfs.is_processed(e.id): # if it was created at that time, and it was one of the ones processed, we can skip it continue # if the request is from a later time, or was not processed during the last run, then do the usual # processing, which in this case is just to publish the data, and let the merge handle it PublicApi.publish(e) # now, revisit the timing of this request. If the time is the same as the last request date, this is a # request which came in during that same second, but was not processed at the time because it was at the # wrong end of the second. In that case, we just need to add the id to the list of records from that second # which have now been processed if e.created_date == wfs.last_request: wfs.add_processed(e.id) else: # otherwise, this is a whole new second, and we can forget everything that went before and start afresh. wfs.last_request = e.created_date wfs.already_processed = [e.id] except: wfs.save(blocking=True) raise wfs.save(blocking=True)
def test_13_process_ehnancements_cycle(self): # Run through the process of processing an enhancement source = EnhancementFixtureFactory.example() if "id" in source: del source["id"] pub_dao = PublicAPC() wfs_dao = WorkflowState() # first make a public record for us to enhance first = PublicAPCFixtureFactory.example() del first["record"]["dc:title"] pub = PublicAPC(first) pub.save(blocking=True) # now create an enhancements on the record second = deepcopy(source) second["record"]["dc:title"] = "Update" second["created_date"] = "2002-01-01T00:00:00Z" en = Enhancement(second) en.public_id = pub.id en.save(blocking=True) # run the job WorkflowApi.process_enhancements() time.sleep(2) # check that the workflow state was created wfs = wfs_dao.pull("enhancements") assert wfs is not None assert wfs.last_request == en.created_date assert wfs.already_processed == [en.id] # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert pubs[0].record.get("dc:title") == "Update" # now run an update with the same date, to observe the difference in the workflow state third = deepcopy(source) third["record"]["dc:title"] = "Update 2" third["created_date"] = "2002-01-01T00:00:00Z" en2 = Enhancement(third) en2.public_id = pub.id en2.save(blocking=True) # run the job again WorkflowApi.process_enhancements() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert ( pubs[0].record.get("dc:title") == "Update" ) # should not have been updated, since data was already present # check that the workflow state was updated wfs = wfs_dao.pull("enhancements") assert wfs is not None assert wfs.last_request == en2.created_date assert wfs.already_processed == [en.id, en2.id] # processed records should have been appended
def test_11_process_requests_cycle(self): # Run through the process of processing a Request into a PublicAPC source = RequestFixtureFactory.example() if "id" in source: del source["id"] pub_dao = PublicAPC() wfs_dao = WorkflowState() # first make a record for the first time first = deepcopy(source) del first["record"]["dc:title"] req = Request(first) req.owner = "test" req.action = "update" req.save(blocking=True) # run the job WorkflowApi.process_requests() time.sleep(2) # first check that a public record was made pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert pubs[0].record.get("dc:title") is None # check that the workflow state was created wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req.created_date assert wfs.already_processed == [req.id] # now run an update with a different date second = deepcopy(source) second["record"]["dc:title"] = "Update" second["created_date"] = "2002-01-01T00:00:00Z" req2 = Request(second) req2.owner = "test" req2.action = "update" req2.save(blocking=True) # run the job again WorkflowApi.process_requests() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert pubs[0].record.get("dc:title") == "Update" # check that the workflow state was updated wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req2.created_date assert wfs.already_processed == [req2.id] # now run an update with the same date, to observe the difference in the workflow state third = deepcopy(source) third["record"]["dc:title"] = "Update 2" third["created_date"] = "2002-01-01T00:00:00Z" req3 = Request(third) req3.owner = "test" req3.action = "update" req3.save(blocking=True) # run the job again WorkflowApi.process_requests() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert ( pubs[0].record.get("dc:title") == "Update 2" ) # should have been updated, as there are only apc contributions from one source # check that the workflow state was updated wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req3.created_date assert wfs.already_processed == [req2.id, req3.id] # processed records should have been appended # finally issue a delete request fourth = deepcopy(source) fourth["created_date"] = "2003-01-01T00:00:00Z" req4 = Request(fourth) req4.owner = "test" req4.action = "delete" req4.save(blocking=True) # run the job again WorkflowApi.process_requests() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 0 # check that the workflow state was updated wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req4.created_date assert wfs.already_processed == [req4.id] # processed records should have been appended