Beispiel #1
0
    def check_jobs(cls):
        """
        Check any existing Lantern jobs for progress, and process any that have completed

        :return:
        """
        dao = LanternJob()

        delay = app.config.get("JOB_LOOKUP_DELAY_LANTERN", 3600)
        cutoff = dates.before_now(delay)
        gen = dao.list_active(cutoff, keepalive="30m")

        for job in gen:
            acc = MonitorUKAccount.pull(job.account)
            lc = client.Lantern(api_key=acc.lantern_api_key)
            prog = lc.get_progress(job.job_id)
            if prog.get("status") == "success":
                pc = prog.get("data",  {}).get("progress", 0)
                if pc != 100:
                    # this will update the last_updated date, which means we won't look at it again for a while
                    job.save()
                    continue

                # if we get here, the job is complete so we need to retrieve it
                results = lc.get_results(job.job_id)
                if results.get("status") == "success":
                    for res in results.get("data", []):
                        enhancement = LanternApi._xwalk(res)
                        enhancement.save()

                # set the job as complete
                job.status = "complete"
                job.save()
Beispiel #2
0
    def test_05_low_quota(self):
        # Check what happens when the use has a low quota on Lantern

        global QUOTA
        QUOTA = 1

        acc2 = MonitorUKAccount()
        acc2.email = "*****@*****.**"
        acc2.lantern_email = "*****@*****.**"
        acc2.lantern_api_key = "123456789"
        acc2.save()

        # a record that needs lantern because of a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        del source["admin"]["lantern_lookup"]
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because it has timed out and has a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save(blocking=True)

        LanternApi.make_new_jobs()

        time.sleep(2)

        dao = LanternJob()
        jobs = [job for job in dao.iterall()]
        assert len(jobs) == 1

        assert len(CREATED_JOBS) == 1
        job = CREATED_JOBS[0]

        assert job["email"] == "*****@*****.**"
        assert len(job["list"]) == 1
Beispiel #3
0
    def test_17_lantern_jobs(self):
        # Check we can create and work with Lantern model objects
        lj = LanternJob()
        lj.job_id = "123456789"
        lj.account = "abcdefg"
        lj.status = "active"
        lj.save(blocking=True)

        dao = LanternJob()
        lj2 = dao.pull(lj.id)
        assert lj2.job_id == "123456789"
        assert lj2.account == "abcdefg"
        assert lj2.status == "active"

        lj2.status = "complete"

        with self.assertRaises(dataobj.DataSchemaException):
            lj2.status = "other"
Beispiel #4
0
    def make_new_jobs(cls):
        """
        Send new requests to lantern, and record the jobs that are created.

        This method looks for accounts who have Lantern credentials, looks for PublicAPC records belonging to those
        accounts which could benefit from lookup in Lantern,

        :return:
        """
        dao = PublicAPC()

        gen = MonitorUKAccount.list_lantern_enabled(keepalive="1h")
        for acc in gen:
            gen2 = dao.list_by_owner(acc.id)
            identifiers = []
            for apc in gen2:
                if LanternApi._needs_lantern_data(apc):
                    idents = LanternApi._get_identifiers(apc)
                    if idents is not None:
                        identifiers.append(idents)
                    apc.lantern_lookup = dates.now()
                    apc.save()

            # if there are no identifiers, no need to do any more
            if len(identifiers) == 0:
                continue

            # now check the user's quota
            lc = client.Lantern(api_key=acc.lantern_api_key)
            quota = lc.get_quota(acc.lantern_email)
            available = quota.get("data", {}).get("available", 0)
            if available == 0:
                continue

            if len(identifiers) > available:
                identifiers = identifiers[:available]

            batches = LanternApi._batch(identifiers)
            for batch in batches:
                resp = lc.create_job(acc.lantern_email, "monitor-uk", batch)
                if resp.get("status") == "success":
                    job_id = resp.get("data", {}).get("job")
                    lj = LanternJob()
                    lj.job_id = job_id
                    lj.account = acc.id
                    lj.status = "active"
                    lj.save()
Beispiel #5
0
    def test_19_lantern_active_jobs(self):
        # Check we can list active Lantern jobs

        lj1 = LanternJob()
        lj1.job_id = "123456789"
        lj1.account = "abcdefg"
        lj1.status = "complete"
        lj1.save()

        lj2 = LanternJob()
        lj2.job_id = "123456789"
        lj2.account = "abcdefg"
        lj2.status = "active"
        lj2.save()

        time.sleep(2)

        lj3 = LanternJob()
        lj3.job_id = "987654321"
        lj3.account = "abcdefg"
        lj3.status = "active"
        lj3.save(blocking=True)

        dao = LanternJob()

        # list all the active jobs, without worrying about cutting them off by date
        gen = dao.list_active()
        jobs = [job for job in gen]

        assert len(jobs) == 2

        # now check that we can do a date cut-off, by setting the before date to just after
        # the oldest one
        lu = lj2.last_updated
        ds = dates.parse(lu)
        ds = ds + timedelta(seconds=1)

        gen = dao.list_active(checked_before=ds)
        jobs = [job for job in gen]

        assert len(jobs) == 1
Beispiel #6
0
    def test_07_check_jobs(self):
        # Ensure that we can check existing jobs correctly

        acc = MonitorUKAccount()
        acc.email = "*****@*****.**"
        acc.lantern_email = "*****@*****.**"
        acc.lantern_api_key = "123456789"
        acc.save()

        lj1 = LanternJob()
        lj1.job_id = "111111111"
        lj1.account = acc.id
        lj1.status = "complete"
        lj1.save()

        lj2 = LanternJob()
        lj2.job_id = "222222222"
        lj2.account = acc.id
        lj2.status = "active"
        lj2.last_updated = dates.format(dates.before_now(5000))
        lj2.save(updated=False)

        lj3 = LanternJob()
        lj3.job_id = "333333333"
        lj3.account = acc.id
        lj3.status = "active"
        lj3.last_updated = dates.format(dates.before_now(5000))
        lj3.save(updated=False)

        lj4 = LanternJob()
        lj4.job_id = "444444444"
        lj4.account = acc.id
        lj4.status = "active"
        lj4.last_updated = dates.format(dates.before_now(5000))
        lj4.save(updated=False)

        lj5 = LanternJob()
        lj5.job_id = "555555555"
        lj5.account = acc.id
        lj5.status = "active"
        lj5.save(blocking=True)

        LanternApi.check_jobs()

        # check that the progress requests we expected were made
        assert len(PROGRESS_REQUESTS) == 3
        assert "222222222" in PROGRESS_REQUESTS
        assert "333333333" in PROGRESS_REQUESTS
        assert "444444444" in PROGRESS_REQUESTS

        # check that the job which received an error was just ignored
        dao = LanternJob()
        ignored = dao.pull(lj4.id)
        assert ignored.last_updated == lj4.last_updated
        assert ignored.status == "active"

        # check that the record which was not complete was touched
        touched = dao.pull(lj2.id)
        assert touched.last_updated != lj2.last_updated
        assert touched.status == "active"

        # check that results were requested only for one item
        assert len(RESULTS_REQUESTS) == 1
        assert "333333333" in RESULTS_REQUESTS

        # wait for a bit, so that enhancements have time to go in
        time.sleep(2)

        # check that an enhancement was registered
        edao = Enhancement()
        gen = edao.iterall()
        enhancements = [e for e in gen]

        assert len(enhancements) == 1

        result = LanternFixtureFactory.xwalk_result()
        assert enhancements[0].data["record"] == result["record"]
Beispiel #7
0
    def test_04_create_job(self):
        # Check we can create jobs correctly

        acc1 = MonitorUKAccount()
        acc1.email = "*****@*****.**"
        acc1.save()

        acc2 = MonitorUKAccount()
        acc2.email = "*****@*****.**"
        acc2.lantern_email = "*****@*****.**"
        acc2.lantern_api_key = "123456789"
        acc2.save()

        acc3 = MonitorUKAccount()
        acc3.email = "*****@*****.**"
        acc3.lantern_email = "*****@*****.**"
        acc3.lantern_api_key = "987654321"
        acc3.save(blocking=True)

        # a record which does not need lantern
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.now()
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because of a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        del source["admin"]["lantern_lookup"]
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because it has timed out and has a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern but has no identifiers
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        del source["record"]["dc:identifier"]
        pub = PublicAPC(source)
        pub.save()

        # a record which does not need lantern
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.now()
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because of a missing field
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        del source["admin"]["lantern_lookup"]
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because it has timed out and has a missing field
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern but has no identifiers
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        del source["record"]["dc:identifier"]
        pub = PublicAPC(source)
        pub.save(blocking=True)

        LanternApi.make_new_jobs()

        time.sleep(2)

        dao = LanternJob()
        jobs = [job for job in dao.iterall()]
        assert len(jobs) == 2

        assert len(CREATED_JOBS) == 2
        count = 0
        for job in CREATED_JOBS:
            if job["email"] == "*****@*****.**":
                count += 1
                assert len(job["list"]) == 2
            elif job["email"] == "*****@*****.**":
                count += 10
                assert len(job["list"]) == 2
        assert count == 11

        # now do the same thing again.  The jobs should not change, as we've already created jobs
        # for all the public records
        LanternApi.make_new_jobs()
        time.sleep(2)
        jobs = [job for job in dao.iterall()]
        assert len(jobs) == 2