def test_02_request(self):
        # Check we can instantiate and work with a Request

        # first make a blank one
        req = Request()

        # now make one around the fixture
        source = RequestFixtureFactory.example()
        req = Request(source)

        # make one with a broken source
        broken = {"whatever" : "broken"}
        with self.assertRaises(dataobj.DataStructureException):
            req = Request(broken)

        # now make one bit by bit
        req = Request()
        req.record = source.get("record")
        req.owner = "test1"
        req.action = "update"
        req.public_id = "abcdefg"

        assert req.owner == "test1"
        assert req.action == "update"
        assert req.public_id == "abcdefg"

        # now make it broken
        req = Request()
        with self.assertRaises(dataobj.DataStructureException):
            req.record = {"random" : "stuff"}
Exemple #2
0
    def test_09_remove_permanent(self):
        # Separate an incoming Request from its corresponding PublicAPC, leaving no owners, thus deleting the record

        source = RequestFixtureFactory.example()
        req = Request(source)
        req.owner = "test"

        # create a record with 2 distinct apcs from different owners
        source2 = PublicAPCFixtureFactory.example()
        pub = PublicAPC(source2)
        pub.remove_apcs_by_owner("abcdefg")  # clear the existing apc record

        apc_record = PublicAPCFixtureFactory.apc_record()
        del apc_record["ref"]  # do this so that the ref gets created correctly later
        pub.add_apc_for_owner("test", apc_record)  # add a new, known one

        pub.save(blocking=True)

        # now request the removal
        PublicApi.remove(req)
        time.sleep(2)

        dao = PublicAPC()
        pub2 = dao.pull(pub.id)
        assert pub2 is None
Exemple #3
0
    def test_06_publish_update(self):
        # Publish an update to an existing PublicAPC

        merge_source = PublicAPCFixtureFactory.record_merge_source()
        merge_target = PublicAPCFixtureFactory.record_merge_target()
        apc_record = PublicAPCFixtureFactory.apc_record()
        result = PublicAPCFixtureFactory.record_merge_result()

        del merge_source["jm:apc"]
        del merge_target["jm:apc"]
        del result["jm:apc"]

        first = deepcopy(apc_record)
        second = deepcopy(apc_record)
        third = deepcopy(apc_record)

        first["organisation_name"] = "First"
        del first["ref"]
        second["organisation_name"] = "Second"
        del second["ref"]
        third["organisation_name"] = "Third"
        del third["ref"]

        req = Request()
        req.record = merge_source
        req.add_apc_record(first)
        req.owner = "11111"

        pub = PublicAPC()
        pub.record = merge_target
        pub.add_apc_for_owner("22222", second)
        pub.add_apc_for_owner("11111", third)
        pub.save(blocking=True)

        PublicApi.publish(req)

        dao = PublicAPC()
        pub2 = dao.pull(pub.id)

        # first check that the apcs are as we would expect
        one = pub2.get_apcs_by_owner("11111")
        two = pub2.get_apcs_by_owner("22222")

        assert len(one) == 1
        assert len(two) == 1
        assert one[0]["organisation_name"] == "First"
        assert two[0]["organisation_name"] == "Second"

        # now check that the metadata merge proceeded correctly
        record = pub2.record
        del record["jm:apc"]
        assert record == result
Exemple #4
0
    def process_requests(cls):
        """
        Go through any new requests (since this method last ran) and process them.  This will apply the creates,
        updates and deletes to the public space

        :return:
        """
        # first, pick up our current state from storage
        workflow_dao = WorkflowState()
        wfs = workflow_dao.pull("requests")

        # if we don't have a current state, make one
        if wfs is None:
            wfs = WorkflowState()
            wfs.id = "requests"

        # get the oldest page of requests and process them
        dao = Request()
        requests = dao.list_all_since(wfs.last_request)     # produces a generator

        for r in requests:
            try:
                # if the request was created at the time of the last request processed, it is possible it arrived
                # before or after the cut-off.  As we don't have any more than second-level granularity in the timing,
                # we also need to check to see whether it was one of the ids processed during that second
                if r.created_date == wfs.last_request and wfs.is_processed(r.id):
                    # if it was created at that time, and it was one of the ones processed, we can skip it
                    continue

                # if the request is from a later time, or was not processed during the last run, then do the usual
                # processing
                if r.action == "update":
                    PublicApi.publish(r)
                elif r.action == "delete":
                    PublicApi.remove(r)

                # now, revisit the timing of this request.  If the time is the same as the last request date, this is a
                # request which came in during that same second, but was not processed at the time because it was at the
                # wrong end of the second.  In that case, we just need to add the id to the list of records from that second
                # which have now been processed
                if r.created_date == wfs.last_request:
                    wfs.add_processed(r.id)
                else:
                    # otherwise, this is a whole new second, and we can forget everything that went before and start afresh.
                    wfs.last_request = r.created_date
                    wfs.already_processed = [r.id]
            except:
                wfs.save(blocking=True)
                raise

        wfs.save(blocking=True)
Exemple #5
0
    def find_request_by_identifier(cls, type, id, owner):
        """
        Find a request by the identifier, within the scope of a given owner

        :param type: the type of identifier (e.g. doi, pmcid, url, etc)
        :param id: the identifier
        :param owner: the owner of the request
        :return: The most recent request owned by that owner with that identifier
        """
        dao = Request()
        results = dao.find_by_identifier(type, id, owner)
        # results are ordered by date, so we want the latest one
        if len(results) > 0:
            return results[0]
        return None
    def test_10_request_refs(self):
        # Check that APC refs are handled correctly by Reuqests

        # first check that refs are stripped automatically on construction
        source = RequestFixtureFactory.example()
        source["record"]["jm:apc"][0]["ref"] = "1234567890"
        req = Request(source)
        assert "ref" not in req.apc_records[0]

        # now do it again, setting the record explicitly
        source = RequestFixtureFactory.example()
        record = source.get("record")
        record["jm:apc"][0]["ref"] = "123456789"
        req = Request()
        req.record = record
        assert "ref" not in req.apc_records[0]
Exemple #7
0
    def test_07_save_delete(self):
        # Work through acycle of saves and deletes to observe the outputs

        source = RequestFixtureFactory.record()
        acc = MonitorUKAccount()
        acc.save(blocking=True)

        req = ApiRequest(source, account=acc)
        req.save()

        dao = Request()
        req2 = dao.pull(req.request.id)
        assert req2 is not None
        assert req2.owner == acc.id
        assert req2.record == source
        assert req2.action == "update"

        # now publish the request
        PublicApi.publish(req2)
        time.sleep(2)

        # now pull the object as identified by its API identifier (which should be the DOI)
        source2 = deepcopy(source)
        source2["dc:title"] = "An update"
        next = ApiRequest.pull(req.id, account=acc)
        next.update(source2)
        next.save()

        # now, at this point we should have 2 request objects in the index.  One for the
        # original save, and one for the new save
        req3 = dao.pull(next.request.id)
        assert req3 is not None
        assert req3.owner == acc.id
        assert req3.record == source2
        assert req3.action == "update"

        # now issue a delete on the same record
        next.delete()

        # by now we should have 3 request objects in the index, 2 for the above updates
        # and one for the delete request
        req4 = dao.pull(next.request.id)
        assert req4 is not None
        assert req4.owner == acc.id
        assert req4.record == source2
        assert req4.action == "delete"
Exemple #8
0
    def test_07_separate_records(self):
        # Separate an incoming Request from its corresponding PublicAPC

        apc_record = PublicAPCFixtureFactory.apc_record()

        req = Request()
        req.owner = "test"

        pub = PublicAPC()
        pub.add_apc_for_owner("test", apc_record)
        pub.add_apc_for_owner("test", apc_record)

        assert len(pub.apc_records) == 2

        PublicApi.separate_records(req, pub)

        assert not pub.has_apcs()
Exemple #9
0
    def delete(cls, record, account, public_id=None):
        """
        Record a "delete" request, with the associated (minimal) record data, requested by the given account

        Delete requests need only come with enough record data to identify the public record

        :param record: The mimimal record data for identifying the record
        :param account:    the account to carry out the request on behalf of
        :param public_id:  the public id of a public record for which this is a delete request
        :return: the Request object that was created
        """
        if record is None:
            raise RequestAPIException("You can't call 'delete' with a NoneType record argument")
        if account is None:
            raise RequestAPIException("You can't call 'delete' with a NoneType account argument")

        req = Request()
        req.record = record
        req.owner = account.id
        req.action = "delete"
        if public_id is not None:
            req.public_id = public_id

        req.save()
        return req
Exemple #10
0
    def test_02_find_public_record(self):
        # Find a public record with a variety of identifiers

        source = PublicAPCFixtureFactory.example()
        pub = PublicAPC(source)
        pub.save(blocking=True)

        # document to form the basis of the queries
        source2 = RequestFixtureFactory.example()

        # create sources with one of each kind of identifier, then look them up using the
        # find_public_record and find_public_record_by_identifier methods
        pid = deepcopy(source2)
        del pid["record"]["dc:identifier"]
        req = Request(pid)
        req.public_id = pub.id
        pub1 = PublicApi.find_public_record(req)
        assert pub1 is not None

        doi = deepcopy(source2)
        doi["record"]["dc:identifier"] = [{"type": "doi", "id": "10.1234/me"}]
        req = Request(doi)
        pub1 = PublicApi.find_public_record(req)
        assert pub1 is not None
        pub11 = PublicApi.find_public_record_by_identifier("doi", "10.1234/me")
        assert pub11 is not None

        pmid = deepcopy(source2)
        pmid["record"]["dc:identifier"] = [{"type": "pmid", "id": "87654321"}]
        req = Request(pmid)
        pub1 = PublicApi.find_public_record(req)
        assert pub1 is not None
        pub11 = PublicApi.find_public_record_by_identifier("pmid", "87654321")
        assert pub11 is not None

        pmcid = deepcopy(source2)
        pmcid["record"]["dc:identifier"] = [{"type": "pmcid", "id": "PMC1234"}]
        req = Request(pmcid)
        pub1 = PublicApi.find_public_record(req)
        assert pub1 is not None
        pub11 = PublicApi.find_public_record_by_identifier("pmcid", "PMC1234")
        assert pub11 is not None

        url = deepcopy(source2)
        url["record"]["dc:identifier"] = [{"type": "url", "id": "http://example.com/whatever"}]
        req = Request(url)
        pub1 = PublicApi.find_public_record(req)
        assert pub1 is not None
        pub11 = PublicApi.find_public_record_by_identifier("url", "http://example.com/whatever")
        assert pub11 is not None

        # finally, ensure that you don't get a match when you shouldn't
        null = deepcopy(source2)
        null["record"]["dc:identifier"] = [{"type": "doi", "id": "10.1234/another"}]
        req = Request(null)
        pub1 = PublicApi.find_public_record(req)
        assert pub1 is None
        pub11 = PublicApi.find_public_record_by_identifier("doi", "10.1234/another")
        assert pub11 is None
Exemple #11
0
    def test_07_request2public(self):
        # Check the conversion of a Request to a PublicAPC

        source = RequestFixtureFactory.example()
        req = Request(source)
        pub = req.make_public_apc()

        assert pub is not None
        assert pub.record is not None
        assert len(pub.apc_records) == 1

        setrefs = []
        for apc in pub.apc_records:
            assert apc.get("ref") is not None
            setrefs.append(apc.get("ref"))
        assert len(setrefs) == 1

        refs = pub.get_apc_refs(req.owner)
        assert len(refs) == 1
        assert refs[0] == setrefs[0]
Exemple #12
0
    def test_03_pull_request(self):
        # Pull a Request through the ApiRequest object

        acc = MonitorUKAccount()
        acc.id = "abcdefghij"
        acc.save(blocking=True)

        # first make a request which contains that doi
        req_source = RequestFixtureFactory.example()
        req = Request(req_source)
        req.save(blocking=True)

        # you can't pull a request object, so just show that that's true...

        # pull by doi should fail
        result = ApiRequest.pull("10.1234/me", account=acc)
        assert result is None

        # pull by request id should fail
        result = ApiRequest.pull(req.id, account=acc)
        assert result is None
Exemple #13
0
    def test_08_remove_separate(self):
        # Separate an incoming Request from its corresponding PublicAPC, leaving only one owner behind

        source = RequestFixtureFactory.example()
        req = Request(source)
        req.owner = "test"

        # create a record with 2 distinct apcs from different owners
        source2 = PublicAPCFixtureFactory.example()
        apc_record = PublicAPCFixtureFactory.apc_record()
        del apc_record["ref"]  # do this so that the ref gets created correctly later
        pub = PublicAPC(source2)
        pub.add_apc_for_owner("test", apc_record)
        pub.save(blocking=True)

        # now request the removal
        PublicApi.remove(req)
        time.sleep(2)

        dao = PublicAPC()
        pub2 = dao.pull(pub.id)

        assert len(pub2.get_apcs_by_owner("test")) == 0
        assert len(pub2.get_apcs_by_owner("abcdefg")) == 1
Exemple #14
0
    def test_13_request_iterator(self):
        # Check we can iterate successfully over lists of Requests

        sources = RequestFixtureFactory.request_per_day("2001-01", 10)

        for s in sources:
            req = Request(s)
            req.save()

        time.sleep(2)

        dao = Request()
        gen = dao.list_all_since("2001-01-01T00:00:00Z", page_size=5)   # set the page size small, to ensure the iterator has to work
        results = [x for x in gen]

        assert len(results) == 10

        dates = [r.created_date for r in results]
        comp = deepcopy(dates)
        comp.sort()     # this puts the dates in ascending order (i.e. oldest first)

        # the point of this comparison is to show that the results came out in the right order.
        # that is, oldest first
        assert dates == comp
Exemple #15
0
    def update(cls, record, account, public_id=None):
        """
        Record an "update" request, with the associated record data, requested by the given account

        :param record: The raw dict data (e.g. coming from the web API) which makes up the body of the request
        :param account: the account to carry out the request on behalf of
        :param public_id: The public id of a public record for which this is an update
        :return: the Request object that was created
        """
        if record is None:
            raise RequestAPIException("You can't call 'update' with a NoneType record argument")
        if account is None:
            raise RequestAPIException("You can't call 'update' with a NoneType account argument")

        req = Request()
        req.record = record
        req.owner = account.id
        req.action = "update"
        if public_id is not None:
            req.public_id = public_id

        req.save()
        return req
Exemple #16
0
    def test_10_find_request(self):
        # Find a Request through a number of routes

        source = RequestFixtureFactory.example()
        req = Request(source)
        req.save(blocking=True)

        time.sleep(2)

        source = RequestFixtureFactory.example()
        req1 = Request(source)
        req1.save(blocking=True)

        # document to form the basis of the queries
        source2 = RequestFixtureFactory.example()

        # create sources with one of each kind of identifier, then look them up using the
        # find_request_by_identifier method
        result = RequestApi.find_request_by_identifier("doi", "10.1234/me", "abcdefghij")
        assert result is not None
        assert result.created_date == req1.created_date

        result = RequestApi.find_request_by_identifier("pmid", "87654321", "abcdefghij")
        assert result is not None
        assert result.created_date == req1.created_date

        result = RequestApi.find_request_by_identifier("pmcid", "PMC1234", "abcdefghij")
        assert result is not None
        assert result.created_date == req1.created_date

        result = RequestApi.find_request_by_identifier("url", "http://example.com/whatever", "abcdefghij")
        assert result is not None
        assert result.created_date == req1.created_date

        # finally, ensure that you don't get a match when you shouldn't
        result = RequestApi.find_request_by_identifier("doi", "10.1234/another", "abcdefghij")
        assert result is None

        result = RequestApi.find_request_by_identifier("doi", "10.1234/me", "test")
        assert result is None
Exemple #17
0
    def test_11_process_requests_cycle(self):
        # Run through the process of processing a Request into a PublicAPC

        source = RequestFixtureFactory.example()
        if "id" in source:
            del source["id"]

        pub_dao = PublicAPC()
        wfs_dao = WorkflowState()

        # first make a record for the first time
        first = deepcopy(source)
        del first["record"]["dc:title"]
        req = Request(first)
        req.owner = "test"
        req.action = "update"
        req.save(blocking=True)

        # run the job
        WorkflowApi.process_requests()

        time.sleep(2)

        # first check that a public record was made
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert pubs[0].record.get("dc:title") is None

        # check that the workflow state was created
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req.created_date
        assert wfs.already_processed == [req.id]

        # now run an update with a different date
        second = deepcopy(source)
        second["record"]["dc:title"] = "Update"
        second["created_date"] = "2002-01-01T00:00:00Z"
        req2 = Request(second)
        req2.owner = "test"
        req2.action = "update"
        req2.save(blocking=True)

        # run the job again
        WorkflowApi.process_requests()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert pubs[0].record.get("dc:title") == "Update"

        # check that the workflow state was updated
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req2.created_date
        assert wfs.already_processed == [req2.id]

        # now run an update with the same date, to observe the difference in the workflow state
        third = deepcopy(source)
        third["record"]["dc:title"] = "Update 2"
        third["created_date"] = "2002-01-01T00:00:00Z"
        req3 = Request(third)
        req3.owner = "test"
        req3.action = "update"
        req3.save(blocking=True)

        # run the job again
        WorkflowApi.process_requests()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 1
        assert (
            pubs[0].record.get("dc:title") == "Update 2"
        )  # should have been updated, as there are only apc contributions from one source

        # check that the workflow state was updated
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req3.created_date
        assert wfs.already_processed == [req2.id, req3.id]  # processed records should have been appended

        # finally issue a delete request
        fourth = deepcopy(source)
        fourth["created_date"] = "2003-01-01T00:00:00Z"
        req4 = Request(fourth)
        req4.owner = "test"
        req4.action = "delete"
        req4.save(blocking=True)

        # run the job again
        WorkflowApi.process_requests()

        time.sleep(2)

        # check the public record was updated
        pubs = pub_dao.find_by_doi("10.1234/me")
        assert len(pubs) == 0

        # check that the workflow state was updated
        wfs = wfs_dao.pull("requests")
        assert wfs is not None
        assert wfs.last_request == req4.created_date
        assert wfs.already_processed == [req4.id]  # processed records should have been appended
Exemple #18
0
    def test_05_request_dao(self):
        # Check the DAO methods on the Request object

        dao = Request()

        source = RequestFixtureFactory.example()
        req = Request(source)
        req.owner = "test1"
        req.action = "update"
        req.public_id = "abcdefg"
        req.save(blocking=True)

        req2 = dao.pull(req.id)
        assert req2 is not None

        # check successful queries for identifiers
        res = dao.find_by_identifier("doi", "10.1234/me", "test1")
        assert len(res) == 1

        res = dao.find_by_identifier("pmcid", "PMC1234", "test1")
        assert len(res) == 1

        res = dao.find_by_identifier("pmid", "87654321", "test1")
        assert len(res) == 1

        res = dao.find_by_identifier("url", "http://example.com/whatever", "test1")
        assert len(res) == 1

        # check unsuccessful ones
        res = dao.find_by_identifier("doi", "10.1234/you", "test1")
        assert len(res) == 0

        res = dao.find_by_identifier("pmcid", "PMC5678", "test1")
        assert len(res) == 0

        res = dao.find_by_identifier("pmid", "123456789", "test1")
        assert len(res) == 0

        res = dao.find_by_identifier("url", "http://example.com/this", "test1")
        assert len(res) == 0

        # and check using the wrong owner
        res = dao.find_by_identifier("doi", "10.1234/me", "test2")
        assert len(res) == 0

        res = dao.find_by_identifier("pmcid", "PMC1234", "test2")
        assert len(res) == 0

        res = dao.find_by_identifier("pmid", "87654321", "test2")
        assert len(res) == 0

        res = dao.find_by_identifier("url", "http://example.com/whatever", "test2")
        assert len(res) == 0
Exemple #19
0
    def test_11_process_requests_exception(self):
        # What happens when the process_reuests method fails for a variety of reasons

        sources = RequestFixtureFactory.request_per_day("2001-01", 9)

        dois = ["10.1234/first", "10.1234/second", "10.1234/third"]

        # we're going to construct a series of requests for each doi
        # starting with a create, then an update, followed by a delete
        # (not that it matters, as we're going to pump them through a mock)
        for i in range(len(sources)):
            s = sources[i]
            doi_idx = i % 3  # iterate over the dois 3 times
            doi = dois[doi_idx]
            s["record"]["dc:identifier"] = [{"type": "doi", "id": doi}]
            if i < 3:
                s["record"]["dc:title"] = "Create"
                req = Request(s)
                req.action = "update"
                req.save()
            elif i < 6:
                s["record"]["dc:title"] = "Update"
                req = Request(s)
                req.action = "update"
                req.save()
            else:
                s["record"]["dc:title"] = "Delete"
                req = Request(s)
                req.action = "delete"
                req.save()

        time.sleep(2)

        # set up the mocks
        PublicApi.publish = publish_mock
        PublicApi.remove = delete_mock

        # now run the process job back to the first day
        with self.assertRaises(TestException):
            WorkflowApi.process_requests()

        # we know this died during the 6th update request being processed,
        # so just check that the workflow state reflects that
        wfs_dao = WorkflowState()
        wfs = wfs_dao.pull("requests")
        assert wfs.last_request == "2001-01-05T00:00:00Z"
        assert len(wfs.already_processed) == 1