def check_jobs(cls): """ Check any existing Lantern jobs for progress, and process any that have completed :return: """ dao = LanternJob() delay = app.config.get("JOB_LOOKUP_DELAY_LANTERN", 3600) cutoff = dates.before_now(delay) gen = dao.list_active(cutoff, keepalive="30m") for job in gen: acc = MonitorUKAccount.pull(job.account) lc = client.Lantern(api_key=acc.lantern_api_key) prog = lc.get_progress(job.job_id) if prog.get("status") == "success": pc = prog.get("data", {}).get("progress", 0) if pc != 100: # this will update the last_updated date, which means we won't look at it again for a while job.save() continue # if we get here, the job is complete so we need to retrieve it results = lc.get_results(job.job_id) if results.get("status") == "success": for res in results.get("data", []): enhancement = LanternApi._xwalk(res) enhancement.save() # set the job as complete job.status = "complete" job.save()
def test_05_low_quota(self): # Check what happens when the use has a low quota on Lantern global QUOTA QUOTA = 1 acc2 = MonitorUKAccount() acc2.email = "*****@*****.**" acc2.lantern_email = "*****@*****.**" acc2.lantern_api_key = "123456789" acc2.save() # a record that needs lantern because of a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) del source["admin"]["lantern_lookup"] del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern because it has timed out and has a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save(blocking=True) LanternApi.make_new_jobs() time.sleep(2) dao = LanternJob() jobs = [job for job in dao.iterall()] assert len(jobs) == 1 assert len(CREATED_JOBS) == 1 job = CREATED_JOBS[0] assert job["email"] == "*****@*****.**" assert len(job["list"]) == 1
def test_17_lantern_jobs(self): # Check we can create and work with Lantern model objects lj = LanternJob() lj.job_id = "123456789" lj.account = "abcdefg" lj.status = "active" lj.save(blocking=True) dao = LanternJob() lj2 = dao.pull(lj.id) assert lj2.job_id == "123456789" assert lj2.account == "abcdefg" assert lj2.status == "active" lj2.status = "complete" with self.assertRaises(dataobj.DataSchemaException): lj2.status = "other"
def make_new_jobs(cls): """ Send new requests to lantern, and record the jobs that are created. This method looks for accounts who have Lantern credentials, looks for PublicAPC records belonging to those accounts which could benefit from lookup in Lantern, :return: """ dao = PublicAPC() gen = MonitorUKAccount.list_lantern_enabled(keepalive="1h") for acc in gen: gen2 = dao.list_by_owner(acc.id) identifiers = [] for apc in gen2: if LanternApi._needs_lantern_data(apc): idents = LanternApi._get_identifiers(apc) if idents is not None: identifiers.append(idents) apc.lantern_lookup = dates.now() apc.save() # if there are no identifiers, no need to do any more if len(identifiers) == 0: continue # now check the user's quota lc = client.Lantern(api_key=acc.lantern_api_key) quota = lc.get_quota(acc.lantern_email) available = quota.get("data", {}).get("available", 0) if available == 0: continue if len(identifiers) > available: identifiers = identifiers[:available] batches = LanternApi._batch(identifiers) for batch in batches: resp = lc.create_job(acc.lantern_email, "monitor-uk", batch) if resp.get("status") == "success": job_id = resp.get("data", {}).get("job") lj = LanternJob() lj.job_id = job_id lj.account = acc.id lj.status = "active" lj.save()
def test_19_lantern_active_jobs(self): # Check we can list active Lantern jobs lj1 = LanternJob() lj1.job_id = "123456789" lj1.account = "abcdefg" lj1.status = "complete" lj1.save() lj2 = LanternJob() lj2.job_id = "123456789" lj2.account = "abcdefg" lj2.status = "active" lj2.save() time.sleep(2) lj3 = LanternJob() lj3.job_id = "987654321" lj3.account = "abcdefg" lj3.status = "active" lj3.save(blocking=True) dao = LanternJob() # list all the active jobs, without worrying about cutting them off by date gen = dao.list_active() jobs = [job for job in gen] assert len(jobs) == 2 # now check that we can do a date cut-off, by setting the before date to just after # the oldest one lu = lj2.last_updated ds = dates.parse(lu) ds = ds + timedelta(seconds=1) gen = dao.list_active(checked_before=ds) jobs = [job for job in gen] assert len(jobs) == 1
def test_07_check_jobs(self): # Ensure that we can check existing jobs correctly acc = MonitorUKAccount() acc.email = "*****@*****.**" acc.lantern_email = "*****@*****.**" acc.lantern_api_key = "123456789" acc.save() lj1 = LanternJob() lj1.job_id = "111111111" lj1.account = acc.id lj1.status = "complete" lj1.save() lj2 = LanternJob() lj2.job_id = "222222222" lj2.account = acc.id lj2.status = "active" lj2.last_updated = dates.format(dates.before_now(5000)) lj2.save(updated=False) lj3 = LanternJob() lj3.job_id = "333333333" lj3.account = acc.id lj3.status = "active" lj3.last_updated = dates.format(dates.before_now(5000)) lj3.save(updated=False) lj4 = LanternJob() lj4.job_id = "444444444" lj4.account = acc.id lj4.status = "active" lj4.last_updated = dates.format(dates.before_now(5000)) lj4.save(updated=False) lj5 = LanternJob() lj5.job_id = "555555555" lj5.account = acc.id lj5.status = "active" lj5.save(blocking=True) LanternApi.check_jobs() # check that the progress requests we expected were made assert len(PROGRESS_REQUESTS) == 3 assert "222222222" in PROGRESS_REQUESTS assert "333333333" in PROGRESS_REQUESTS assert "444444444" in PROGRESS_REQUESTS # check that the job which received an error was just ignored dao = LanternJob() ignored = dao.pull(lj4.id) assert ignored.last_updated == lj4.last_updated assert ignored.status == "active" # check that the record which was not complete was touched touched = dao.pull(lj2.id) assert touched.last_updated != lj2.last_updated assert touched.status == "active" # check that results were requested only for one item assert len(RESULTS_REQUESTS) == 1 assert "333333333" in RESULTS_REQUESTS # wait for a bit, so that enhancements have time to go in time.sleep(2) # check that an enhancement was registered edao = Enhancement() gen = edao.iterall() enhancements = [e for e in gen] assert len(enhancements) == 1 result = LanternFixtureFactory.xwalk_result() assert enhancements[0].data["record"] == result["record"]
def test_04_create_job(self): # Check we can create jobs correctly acc1 = MonitorUKAccount() acc1.email = "*****@*****.**" acc1.save() acc2 = MonitorUKAccount() acc2.email = "*****@*****.**" acc2.lantern_email = "*****@*****.**" acc2.lantern_api_key = "123456789" acc2.save() acc3 = MonitorUKAccount() acc3.email = "*****@*****.**" acc3.lantern_email = "*****@*****.**" acc3.lantern_api_key = "987654321" acc3.save(blocking=True) # a record which does not need lantern source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.now() pub = PublicAPC(source) pub.save() # a record that needs lantern because of a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) del source["admin"]["lantern_lookup"] del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern because it has timed out and has a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern but has no identifiers source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] del source["record"]["dc:identifier"] pub = PublicAPC(source) pub.save() # a record which does not need lantern source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) source["admin"]["lantern_lookup"] = dates.now() pub = PublicAPC(source) pub.save() # a record that needs lantern because of a missing field source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) del source["admin"]["lantern_lookup"] del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern because it has timed out and has a missing field source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern but has no identifiers source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] del source["record"]["dc:identifier"] pub = PublicAPC(source) pub.save(blocking=True) LanternApi.make_new_jobs() time.sleep(2) dao = LanternJob() jobs = [job for job in dao.iterall()] assert len(jobs) == 2 assert len(CREATED_JOBS) == 2 count = 0 for job in CREATED_JOBS: if job["email"] == "*****@*****.**": count += 1 assert len(job["list"]) == 2 elif job["email"] == "*****@*****.**": count += 10 assert len(job["list"]) == 2 assert count == 11 # now do the same thing again. The jobs should not change, as we've already created jobs # for all the public records LanternApi.make_new_jobs() time.sleep(2) jobs = [job for job in dao.iterall()] assert len(jobs) == 2