def test_02_request(self): # Check we can instantiate and work with a Request # first make a blank one req = Request() # now make one around the fixture source = RequestFixtureFactory.example() req = Request(source) # make one with a broken source broken = {"whatever" : "broken"} with self.assertRaises(dataobj.DataStructureException): req = Request(broken) # now make one bit by bit req = Request() req.record = source.get("record") req.owner = "test1" req.action = "update" req.public_id = "abcdefg" assert req.owner == "test1" assert req.action == "update" assert req.public_id == "abcdefg" # now make it broken req = Request() with self.assertRaises(dataobj.DataStructureException): req.record = {"random" : "stuff"}
def test_09_remove_permanent(self): # Separate an incoming Request from its corresponding PublicAPC, leaving no owners, thus deleting the record source = RequestFixtureFactory.example() req = Request(source) req.owner = "test" # create a record with 2 distinct apcs from different owners source2 = PublicAPCFixtureFactory.example() pub = PublicAPC(source2) pub.remove_apcs_by_owner("abcdefg") # clear the existing apc record apc_record = PublicAPCFixtureFactory.apc_record() del apc_record["ref"] # do this so that the ref gets created correctly later pub.add_apc_for_owner("test", apc_record) # add a new, known one pub.save(blocking=True) # now request the removal PublicApi.remove(req) time.sleep(2) dao = PublicAPC() pub2 = dao.pull(pub.id) assert pub2 is None
def test_06_publish_update(self): # Publish an update to an existing PublicAPC merge_source = PublicAPCFixtureFactory.record_merge_source() merge_target = PublicAPCFixtureFactory.record_merge_target() apc_record = PublicAPCFixtureFactory.apc_record() result = PublicAPCFixtureFactory.record_merge_result() del merge_source["jm:apc"] del merge_target["jm:apc"] del result["jm:apc"] first = deepcopy(apc_record) second = deepcopy(apc_record) third = deepcopy(apc_record) first["organisation_name"] = "First" del first["ref"] second["organisation_name"] = "Second" del second["ref"] third["organisation_name"] = "Third" del third["ref"] req = Request() req.record = merge_source req.add_apc_record(first) req.owner = "11111" pub = PublicAPC() pub.record = merge_target pub.add_apc_for_owner("22222", second) pub.add_apc_for_owner("11111", third) pub.save(blocking=True) PublicApi.publish(req) dao = PublicAPC() pub2 = dao.pull(pub.id) # first check that the apcs are as we would expect one = pub2.get_apcs_by_owner("11111") two = pub2.get_apcs_by_owner("22222") assert len(one) == 1 assert len(two) == 1 assert one[0]["organisation_name"] == "First" assert two[0]["organisation_name"] == "Second" # now check that the metadata merge proceeded correctly record = pub2.record del record["jm:apc"] assert record == result
def process_requests(cls): """ Go through any new requests (since this method last ran) and process them. This will apply the creates, updates and deletes to the public space :return: """ # first, pick up our current state from storage workflow_dao = WorkflowState() wfs = workflow_dao.pull("requests") # if we don't have a current state, make one if wfs is None: wfs = WorkflowState() wfs.id = "requests" # get the oldest page of requests and process them dao = Request() requests = dao.list_all_since(wfs.last_request) # produces a generator for r in requests: try: # if the request was created at the time of the last request processed, it is possible it arrived # before or after the cut-off. As we don't have any more than second-level granularity in the timing, # we also need to check to see whether it was one of the ids processed during that second if r.created_date == wfs.last_request and wfs.is_processed(r.id): # if it was created at that time, and it was one of the ones processed, we can skip it continue # if the request is from a later time, or was not processed during the last run, then do the usual # processing if r.action == "update": PublicApi.publish(r) elif r.action == "delete": PublicApi.remove(r) # now, revisit the timing of this request. If the time is the same as the last request date, this is a # request which came in during that same second, but was not processed at the time because it was at the # wrong end of the second. In that case, we just need to add the id to the list of records from that second # which have now been processed if r.created_date == wfs.last_request: wfs.add_processed(r.id) else: # otherwise, this is a whole new second, and we can forget everything that went before and start afresh. wfs.last_request = r.created_date wfs.already_processed = [r.id] except: wfs.save(blocking=True) raise wfs.save(blocking=True)
def find_request_by_identifier(cls, type, id, owner): """ Find a request by the identifier, within the scope of a given owner :param type: the type of identifier (e.g. doi, pmcid, url, etc) :param id: the identifier :param owner: the owner of the request :return: The most recent request owned by that owner with that identifier """ dao = Request() results = dao.find_by_identifier(type, id, owner) # results are ordered by date, so we want the latest one if len(results) > 0: return results[0] return None
def test_10_request_refs(self): # Check that APC refs are handled correctly by Reuqests # first check that refs are stripped automatically on construction source = RequestFixtureFactory.example() source["record"]["jm:apc"][0]["ref"] = "1234567890" req = Request(source) assert "ref" not in req.apc_records[0] # now do it again, setting the record explicitly source = RequestFixtureFactory.example() record = source.get("record") record["jm:apc"][0]["ref"] = "123456789" req = Request() req.record = record assert "ref" not in req.apc_records[0]
def test_07_save_delete(self): # Work through acycle of saves and deletes to observe the outputs source = RequestFixtureFactory.record() acc = MonitorUKAccount() acc.save(blocking=True) req = ApiRequest(source, account=acc) req.save() dao = Request() req2 = dao.pull(req.request.id) assert req2 is not None assert req2.owner == acc.id assert req2.record == source assert req2.action == "update" # now publish the request PublicApi.publish(req2) time.sleep(2) # now pull the object as identified by its API identifier (which should be the DOI) source2 = deepcopy(source) source2["dc:title"] = "An update" next = ApiRequest.pull(req.id, account=acc) next.update(source2) next.save() # now, at this point we should have 2 request objects in the index. One for the # original save, and one for the new save req3 = dao.pull(next.request.id) assert req3 is not None assert req3.owner == acc.id assert req3.record == source2 assert req3.action == "update" # now issue a delete on the same record next.delete() # by now we should have 3 request objects in the index, 2 for the above updates # and one for the delete request req4 = dao.pull(next.request.id) assert req4 is not None assert req4.owner == acc.id assert req4.record == source2 assert req4.action == "delete"
def test_07_separate_records(self): # Separate an incoming Request from its corresponding PublicAPC apc_record = PublicAPCFixtureFactory.apc_record() req = Request() req.owner = "test" pub = PublicAPC() pub.add_apc_for_owner("test", apc_record) pub.add_apc_for_owner("test", apc_record) assert len(pub.apc_records) == 2 PublicApi.separate_records(req, pub) assert not pub.has_apcs()
def delete(cls, record, account, public_id=None): """ Record a "delete" request, with the associated (minimal) record data, requested by the given account Delete requests need only come with enough record data to identify the public record :param record: The mimimal record data for identifying the record :param account: the account to carry out the request on behalf of :param public_id: the public id of a public record for which this is a delete request :return: the Request object that was created """ if record is None: raise RequestAPIException("You can't call 'delete' with a NoneType record argument") if account is None: raise RequestAPIException("You can't call 'delete' with a NoneType account argument") req = Request() req.record = record req.owner = account.id req.action = "delete" if public_id is not None: req.public_id = public_id req.save() return req
def test_02_find_public_record(self): # Find a public record with a variety of identifiers source = PublicAPCFixtureFactory.example() pub = PublicAPC(source) pub.save(blocking=True) # document to form the basis of the queries source2 = RequestFixtureFactory.example() # create sources with one of each kind of identifier, then look them up using the # find_public_record and find_public_record_by_identifier methods pid = deepcopy(source2) del pid["record"]["dc:identifier"] req = Request(pid) req.public_id = pub.id pub1 = PublicApi.find_public_record(req) assert pub1 is not None doi = deepcopy(source2) doi["record"]["dc:identifier"] = [{"type": "doi", "id": "10.1234/me"}] req = Request(doi) pub1 = PublicApi.find_public_record(req) assert pub1 is not None pub11 = PublicApi.find_public_record_by_identifier("doi", "10.1234/me") assert pub11 is not None pmid = deepcopy(source2) pmid["record"]["dc:identifier"] = [{"type": "pmid", "id": "87654321"}] req = Request(pmid) pub1 = PublicApi.find_public_record(req) assert pub1 is not None pub11 = PublicApi.find_public_record_by_identifier("pmid", "87654321") assert pub11 is not None pmcid = deepcopy(source2) pmcid["record"]["dc:identifier"] = [{"type": "pmcid", "id": "PMC1234"}] req = Request(pmcid) pub1 = PublicApi.find_public_record(req) assert pub1 is not None pub11 = PublicApi.find_public_record_by_identifier("pmcid", "PMC1234") assert pub11 is not None url = deepcopy(source2) url["record"]["dc:identifier"] = [{"type": "url", "id": "http://example.com/whatever"}] req = Request(url) pub1 = PublicApi.find_public_record(req) assert pub1 is not None pub11 = PublicApi.find_public_record_by_identifier("url", "http://example.com/whatever") assert pub11 is not None # finally, ensure that you don't get a match when you shouldn't null = deepcopy(source2) null["record"]["dc:identifier"] = [{"type": "doi", "id": "10.1234/another"}] req = Request(null) pub1 = PublicApi.find_public_record(req) assert pub1 is None pub11 = PublicApi.find_public_record_by_identifier("doi", "10.1234/another") assert pub11 is None
def test_07_request2public(self): # Check the conversion of a Request to a PublicAPC source = RequestFixtureFactory.example() req = Request(source) pub = req.make_public_apc() assert pub is not None assert pub.record is not None assert len(pub.apc_records) == 1 setrefs = [] for apc in pub.apc_records: assert apc.get("ref") is not None setrefs.append(apc.get("ref")) assert len(setrefs) == 1 refs = pub.get_apc_refs(req.owner) assert len(refs) == 1 assert refs[0] == setrefs[0]
def test_03_pull_request(self): # Pull a Request through the ApiRequest object acc = MonitorUKAccount() acc.id = "abcdefghij" acc.save(blocking=True) # first make a request which contains that doi req_source = RequestFixtureFactory.example() req = Request(req_source) req.save(blocking=True) # you can't pull a request object, so just show that that's true... # pull by doi should fail result = ApiRequest.pull("10.1234/me", account=acc) assert result is None # pull by request id should fail result = ApiRequest.pull(req.id, account=acc) assert result is None
def test_08_remove_separate(self): # Separate an incoming Request from its corresponding PublicAPC, leaving only one owner behind source = RequestFixtureFactory.example() req = Request(source) req.owner = "test" # create a record with 2 distinct apcs from different owners source2 = PublicAPCFixtureFactory.example() apc_record = PublicAPCFixtureFactory.apc_record() del apc_record["ref"] # do this so that the ref gets created correctly later pub = PublicAPC(source2) pub.add_apc_for_owner("test", apc_record) pub.save(blocking=True) # now request the removal PublicApi.remove(req) time.sleep(2) dao = PublicAPC() pub2 = dao.pull(pub.id) assert len(pub2.get_apcs_by_owner("test")) == 0 assert len(pub2.get_apcs_by_owner("abcdefg")) == 1
def test_13_request_iterator(self): # Check we can iterate successfully over lists of Requests sources = RequestFixtureFactory.request_per_day("2001-01", 10) for s in sources: req = Request(s) req.save() time.sleep(2) dao = Request() gen = dao.list_all_since("2001-01-01T00:00:00Z", page_size=5) # set the page size small, to ensure the iterator has to work results = [x for x in gen] assert len(results) == 10 dates = [r.created_date for r in results] comp = deepcopy(dates) comp.sort() # this puts the dates in ascending order (i.e. oldest first) # the point of this comparison is to show that the results came out in the right order. # that is, oldest first assert dates == comp
def update(cls, record, account, public_id=None): """ Record an "update" request, with the associated record data, requested by the given account :param record: The raw dict data (e.g. coming from the web API) which makes up the body of the request :param account: the account to carry out the request on behalf of :param public_id: The public id of a public record for which this is an update :return: the Request object that was created """ if record is None: raise RequestAPIException("You can't call 'update' with a NoneType record argument") if account is None: raise RequestAPIException("You can't call 'update' with a NoneType account argument") req = Request() req.record = record req.owner = account.id req.action = "update" if public_id is not None: req.public_id = public_id req.save() return req
def test_10_find_request(self): # Find a Request through a number of routes source = RequestFixtureFactory.example() req = Request(source) req.save(blocking=True) time.sleep(2) source = RequestFixtureFactory.example() req1 = Request(source) req1.save(blocking=True) # document to form the basis of the queries source2 = RequestFixtureFactory.example() # create sources with one of each kind of identifier, then look them up using the # find_request_by_identifier method result = RequestApi.find_request_by_identifier("doi", "10.1234/me", "abcdefghij") assert result is not None assert result.created_date == req1.created_date result = RequestApi.find_request_by_identifier("pmid", "87654321", "abcdefghij") assert result is not None assert result.created_date == req1.created_date result = RequestApi.find_request_by_identifier("pmcid", "PMC1234", "abcdefghij") assert result is not None assert result.created_date == req1.created_date result = RequestApi.find_request_by_identifier("url", "http://example.com/whatever", "abcdefghij") assert result is not None assert result.created_date == req1.created_date # finally, ensure that you don't get a match when you shouldn't result = RequestApi.find_request_by_identifier("doi", "10.1234/another", "abcdefghij") assert result is None result = RequestApi.find_request_by_identifier("doi", "10.1234/me", "test") assert result is None
def test_11_process_requests_cycle(self): # Run through the process of processing a Request into a PublicAPC source = RequestFixtureFactory.example() if "id" in source: del source["id"] pub_dao = PublicAPC() wfs_dao = WorkflowState() # first make a record for the first time first = deepcopy(source) del first["record"]["dc:title"] req = Request(first) req.owner = "test" req.action = "update" req.save(blocking=True) # run the job WorkflowApi.process_requests() time.sleep(2) # first check that a public record was made pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert pubs[0].record.get("dc:title") is None # check that the workflow state was created wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req.created_date assert wfs.already_processed == [req.id] # now run an update with a different date second = deepcopy(source) second["record"]["dc:title"] = "Update" second["created_date"] = "2002-01-01T00:00:00Z" req2 = Request(second) req2.owner = "test" req2.action = "update" req2.save(blocking=True) # run the job again WorkflowApi.process_requests() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert pubs[0].record.get("dc:title") == "Update" # check that the workflow state was updated wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req2.created_date assert wfs.already_processed == [req2.id] # now run an update with the same date, to observe the difference in the workflow state third = deepcopy(source) third["record"]["dc:title"] = "Update 2" third["created_date"] = "2002-01-01T00:00:00Z" req3 = Request(third) req3.owner = "test" req3.action = "update" req3.save(blocking=True) # run the job again WorkflowApi.process_requests() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 1 assert ( pubs[0].record.get("dc:title") == "Update 2" ) # should have been updated, as there are only apc contributions from one source # check that the workflow state was updated wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req3.created_date assert wfs.already_processed == [req2.id, req3.id] # processed records should have been appended # finally issue a delete request fourth = deepcopy(source) fourth["created_date"] = "2003-01-01T00:00:00Z" req4 = Request(fourth) req4.owner = "test" req4.action = "delete" req4.save(blocking=True) # run the job again WorkflowApi.process_requests() time.sleep(2) # check the public record was updated pubs = pub_dao.find_by_doi("10.1234/me") assert len(pubs) == 0 # check that the workflow state was updated wfs = wfs_dao.pull("requests") assert wfs is not None assert wfs.last_request == req4.created_date assert wfs.already_processed == [req4.id] # processed records should have been appended
def test_05_request_dao(self): # Check the DAO methods on the Request object dao = Request() source = RequestFixtureFactory.example() req = Request(source) req.owner = "test1" req.action = "update" req.public_id = "abcdefg" req.save(blocking=True) req2 = dao.pull(req.id) assert req2 is not None # check successful queries for identifiers res = dao.find_by_identifier("doi", "10.1234/me", "test1") assert len(res) == 1 res = dao.find_by_identifier("pmcid", "PMC1234", "test1") assert len(res) == 1 res = dao.find_by_identifier("pmid", "87654321", "test1") assert len(res) == 1 res = dao.find_by_identifier("url", "http://example.com/whatever", "test1") assert len(res) == 1 # check unsuccessful ones res = dao.find_by_identifier("doi", "10.1234/you", "test1") assert len(res) == 0 res = dao.find_by_identifier("pmcid", "PMC5678", "test1") assert len(res) == 0 res = dao.find_by_identifier("pmid", "123456789", "test1") assert len(res) == 0 res = dao.find_by_identifier("url", "http://example.com/this", "test1") assert len(res) == 0 # and check using the wrong owner res = dao.find_by_identifier("doi", "10.1234/me", "test2") assert len(res) == 0 res = dao.find_by_identifier("pmcid", "PMC1234", "test2") assert len(res) == 0 res = dao.find_by_identifier("pmid", "87654321", "test2") assert len(res) == 0 res = dao.find_by_identifier("url", "http://example.com/whatever", "test2") assert len(res) == 0
def test_11_process_requests_exception(self): # What happens when the process_reuests method fails for a variety of reasons sources = RequestFixtureFactory.request_per_day("2001-01", 9) dois = ["10.1234/first", "10.1234/second", "10.1234/third"] # we're going to construct a series of requests for each doi # starting with a create, then an update, followed by a delete # (not that it matters, as we're going to pump them through a mock) for i in range(len(sources)): s = sources[i] doi_idx = i % 3 # iterate over the dois 3 times doi = dois[doi_idx] s["record"]["dc:identifier"] = [{"type": "doi", "id": doi}] if i < 3: s["record"]["dc:title"] = "Create" req = Request(s) req.action = "update" req.save() elif i < 6: s["record"]["dc:title"] = "Update" req = Request(s) req.action = "update" req.save() else: s["record"]["dc:title"] = "Delete" req = Request(s) req.action = "delete" req.save() time.sleep(2) # set up the mocks PublicApi.publish = publish_mock PublicApi.remove = delete_mock # now run the process job back to the first day with self.assertRaises(TestException): WorkflowApi.process_requests() # we know this died during the 6th update request being processed, # so just check that the workflow state reflects that wfs_dao = WorkflowState() wfs = wfs_dao.pull("requests") assert wfs.last_request == "2001-01-05T00:00:00Z" assert len(wfs.already_processed) == 1