def restart(args, storage): """Restart a given job""" try: job = Job(storage, args.job_id) job.fetch() except ValueError as e: raise AntismashRunError('Job {} not found in database, {}!'.format(args.job_id, e)) if job.state not in ('queued', 'running', 'done', 'failed'): raise AntismashRunError('Job {job.job_id} in state {job.state} cannot be restarted'.format(job=job)) old_queue = "jobs:{}".format(job.state) job.state = 'queued' job.status = 'restarted' job.dispatcher = '' job.target_queues = [args.queue] if job.download: job.needs_download = True job.target_queues.append("jobs:downloads") storage.lrem(old_queue, value=job.job_id, count=-1) storage.rpush(job.target_queues.pop(), job.job_id) job.commit() return "Restarted job {}".format(job.job_id)
def cancel(args, storage): """Cancel a job.""" try: job = Job(storage, args.job_id) job.fetch() except ValueError as e: raise AntismashRunError('Job {} not found in database, {}!'.format(args.job_id, e)) if job.state not in ('created', 'downloading', 'validating', 'waiting', 'queued'): if not args.force: return "Cannot cancel job in state {}".format(job.state) old_state = job.state job.state = args.state job.status = "{}: {}".format(args.state, args.reason) storage.lrem('jobs:{}'.format(old_state), value=job.job_id, count=-1) storage.lpush('jobs:{}'.format(job.state), job.job_id) ret = "Canceled job {j.job_id} ({j.state})".format(j=job) if args.notify: ret += '\n' ret += dispatch_mail(job) return ret
def test__copy_files(app, mocker): fake_db = get_db() assert app.config['FAKE_DB'] old_job = Job(fake_db, 'bacteria-old') old_job.filename = 'fake.fa' old_job.gff3 = 'fake.gff' new_job = Job.fromExisting('bacteria-new', old_job) fake_makedirs = mocker.patch('os.makedirs') fake_copyfile = mocker.patch('shutil.copyfile') utils._copy_files('fake_base', old_job, new_job) new_job_basedir = os.path.join('fake_base', new_job.job_id, 'input') fake_makedirs.assert_called_once_with(new_job_basedir, exist_ok=True) old_filename = os.path.join('fake_base', old_job.job_id, 'input', old_job.filename) old_gff3 = os.path.join('fake_base', old_job.job_id, 'input', old_job.gff3) new_filename = os.path.join('fake_base', new_job.job_id, 'input', new_job.filename) new_gff3 = os.path.join('fake_base', new_job.job_id, 'input', new_job.gff3) fake_copyfile.assert_has_calls( [call(old_filename, new_filename), call(old_gff3, new_gff3)])
def notify(args, storage): """Send email notification about a given job""" try: job = Job(storage, args.job_id) job.fetch() except ValueError as e: raise AntismashRunError('Job {} not found in database, {}!'.format(args.job_id, e)) return dispatch_mail(job)
def show(args, storage): """Handle smashctl job show""" try: job = Job(storage, args.job_id) job.fetch() except ValueError as e: raise AntismashRunError('Job {} not found in database, {}!'.format(args.job_id, e)) template = "{job.job_id}\t{job.dispatcher}\t{job.added}\t{job.last_changed}\t{job.email}\t{job.state}\t{job.status}" return template.format(job=job)
def test_show_simple(db): j = Job(db, 'bacteria-fake') j.commit() args = Namespace(job_id='bacteria-fake') expected = "{job.job_id}\t{job.dispatcher}\t{job.added}\t{job.last_changed}\t{job.email}\t{job.state}\t{job.status}".format(job=j) assert job.show(args, db) == expected args.job_id = 'bacteria-nonexisting' with pytest.raises(AntismashRunError): job.show(args, db)
def _get_oldest_job(queue): """Get the oldest job in a queue""" redis_store = get_db() try: job_id = redis_store.lrange(queue, -1, -1)[0] except IndexError: return None job = Job(redis_store, job_id) job.fetch() return job
def test_run_loop_target_queues(db, mocker, tmpdir): mocker.patch("downloader.core.download_job_files") upload_dir = tmpdir.mkdir("upload") cfg = config.Config(name="test", workdir=str(upload_dir)) job = Job(db, "bacteria-123456") job.target_queues.append("jobs:special") job.commit() db.lpush("jobs:downloads", job.job_id) core.run_loop(cfg, db) job.fetch() assert job.target_queues == [] assert db.rpop("jobs:special") == job.job_id
def download_job_files(config: Config, job: Job) -> None: """Download the files of an antiSMASH job.""" job.state = 'downloading' job.status = "Downloading {} from NCBI".format(job.download) job.trace.append(config.name) job.commit() dl_prefix = os.path.join(config.workdir, job.job_id, 'input', job.download) nad_conf = NadConfig(format="genbank", recursive=True) download_to_file(job.download, nad_conf, dl_prefix) job.state = 'queued' job.needs_download = False job.status = "pending" job.filename = '{}.gbk'.format(job.download) job.commit()
def run_loop(config: Config, db: redis.Redis) -> None: """Run one iteration of the main loop.""" my_queue = "{}:downloading".format(config.name) uid = None queues_to_check = [config.download_queue] # First, try to pick up any left over jobs from before a crash uid = db.lindex(my_queue, -1) for queue in queues_to_check: if uid is not None: break uid = db.rpoplpush(queue, my_queue) if uid is None: return job = Job(db, uid).fetch() if job.needs_download and job.download: try: logging.info("Downloading files for %s", job.job_id) download_job_files(config, job) logging.info("Done with %s", job.job_id) except (DownloadError, InvalidIdError, ValidationError, ValueError) as err: job.state = "failed" job.status = "Failed to download file from NCBI" job.target_queues.append(config.failed_queue) if isinstance(err, ValidationError): VALIDATION_ERROR.inc() elif isinstance(err, InvalidIdError): INVALID_ID.inc() elif isinstance(err, DownloadError): DOWNLOAD_ERROR.inc() elif isinstance(err, ValueError): logging.error("ValueError raised when trying to download {j.job_id}:{j.download}".format(j=job)) DOWNLOAD_ERROR.inc() if job.target_queues: queue_name = job.target_queues.pop() else: # fallback, do we want this? queue_name = "jobs:queued" job.commit() db.lrem(my_queue, 1, job.job_id) db.lpush(queue_name, job.job_id)
def status(task_id): redis_store = get_db() job = Job(redis_store, task_id) try: job.fetch() except ValueError: # TODO: Write a json error handler for 404 errors abort(404) res = job.to_dict() if job.state == 'done': result_url = "%s/%s/index.html" % (app.config['RESULTS_URL'], job.job_id) res['result_url'] = result_url res['added_ts'] = job.added.strftime("%Y-%m-%dT%H:%M:%SZ") res['last_changed_ts'] = job.last_changed.strftime("%Y-%m-%dT%H:%M:%SZ") # TODO: This fixes old web UIs while stupid browser caching is going on. Can be removed soon, I hope. # I hate browser caches. res['short_status'] = job.state return jsonify(res)
def test__dark_launch_job(app, mocker): fake_db = get_db() assert app.config['FAKE_DB'] app.config['DARK_LAUNCH_PERCENTAGE'] = 10 fake_randrange = mocker.patch('random.randrange', return_value=15) old_len = fake_db.llen('jobs:development') job = Job(fake_db, 'taxon-fake') job.commit() utils._dark_launch_job(fake_db, job, app.config) assert fake_db.llen('jobs:development') == old_len fake_randrange = mocker.patch('random.randrange', return_value=5) utils._dark_launch_job(fake_db, job, app.config) assert fake_db.llen('jobs:development') == old_len + 1 dark_job_id = fake_db.lrange('jobs:development', -1, -1)[0] dark_job = Job(fake_db, dark_job_id).fetch() assert dark_job.original_id == job.job_id # trim with start > end empties the list fake_db.ltrim('jobs:downloads', 2, 1) job.needs_download = True job.commit() utils._dark_launch_job(fake_db, job, app.config) assert fake_db.llen('jobs:downloads') == 1 dark_job_id = fake_db.lrange('jobs:downloads', -1, -1)[0] dark_job = Job(fake_db, dark_job_id).fetch() assert dark_job.original_id == job.job_id assert dark_job.target_queues == ['jobs:development']
def joblist(args, storage): """Handle listing jobs""" queue_key = 'jobs:{}'.format(args.queue) template = '{job.job_id}\t{job.jobtype}\t{job.dispatcher}\t{job.email}\t{job.added}\t{job.last_changed}\t' \ '{job.filename}{job.download}\t{job.state}\t{job.status}' result_lines = [] jobs = storage.lrange(queue_key, 0, -1) for job_id in jobs: try: job = Job(storage, job_id) job.fetch() result_lines.append(template.format(job=job)) except ValueError: pass if not result_lines: return "No jobs in queue {!r}".format(args.queue) return "\n".join(result_lines)
def test_send_mail(mocker): mock_handle_send = mocker.patch('smashctl.mail.handle_send') job = Job(None, 'bacteria-fake') job.state = 'done' job.email = '*****@*****.**' conf = generate_mail_conf() mail.send_mail(conf, job) message = mock_handle_send.call_args[0][1] assert message['From'] == conf.sender assert message['To'] == job.email text = message.get_payload() assert 'The antiSMASH job' in text assert 'You can find the results' in text mock_handle_send.reset() job.state = 'failed' mail.send_mail(conf, job) message = mock_handle_send.call_args[0][1] assert message['From'] == conf.sender assert message['To'] == job.email text = message.get_payload() assert 'The antiSMASH job' in text assert 'Please contact' in text
def _dark_launch_job(redis_store, job, config): """Submit a copy of the job to the development queue so we can test new versions on real data""" if not _want_to_run(config['DARK_LAUNCH_PERCENTAGE']): return new_job_id = _generate_jobid(config['TAXON']) new_job = Job.fromExisting(new_job_id, job) new_job.email = config['DARK_LAUNCH_EMAIL'] new_job.jobtype = config['DARK_LAUNCH_JOBTYPE'] # Activate all the extra analyses so we can test those as well new_job.asf = True new_job.clusterhmmer = True new_job.pfam2go = True new_job.rre = True new_job.tigrfam = True # Activate all the *clusterblast options new_job.clusterblast = True new_job.knownclusterblast = True new_job.subclusterblast = True new_job.cc_mibig = True # Don't always run smcog-trees if _want_to_run(config['RARE_TEST_PERCENTAGE']): new_job.smcog_trees = True # Only run cassis occasionally, and only on fungal jobs if job.taxon == "fungi" and _want_to_run(config['RARE_TEST_PERCENTAGE']): new_job.cassis = True _copy_files(config['RESULTS_PATH'], job, new_job) new_job.target_queues = [config['DEVELOPMENT_QUEUE']] if new_job.needs_download: new_job.target_queues.append(config['DOWNLOAD_QUEUE']) _add_to_queue(redis_store, new_job)
def test_api_status_pending(client): """Test reading the status of a job""" data = dict(ncbi='FAKE') response = client.post(url_for('api_submit'), data=data) job_id = response.json['id'] response = client.get(url_for('status', task_id=job_id)) assert 200 == response.status_code assert response.json['state'] == 'queued' redis = get_db() job = Job(redis, job_id) job.fetch() job.state = 'done' job.commit() response = client.get(url_for('status', task_id=job_id)) assert 200 == response.status_code assert 'result_url' in response.json response = client.get(url_for('status', task_id='nonexistent')) assert 404 == response.status_code
def test__submit_job_vip(app): """Test VIP job submission works as expected""" fake_db = get_db() queue = app.config['PRIORITY_QUEUE'] assert app.config['FAKE_DB'] old_len = fake_db.llen(queue) app.config['VIP_USERS'].add('*****@*****.**') # No priority queue for Bob job = Job(fake_db, 'taxon-fake') job.email = '*****@*****.**' job.commit() utils._submit_job(fake_db, job, app.config) assert old_len == fake_db.llen(queue) # Priority queue for Alice job = Job(fake_db, 'taxon-fake') job.email = '*****@*****.**' job.commit() utils._submit_job(fake_db, job, app.config) assert old_len + 1 == fake_db.llen(queue) # Priority queue when downloading fake_db.ltrim(app.config['DOWNLOAD_QUEUE'], 2, 1) # clear queue job = Job(fake_db, 'taxon-fake') job.email = '*****@*****.**' job.needs_download = True job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(app.config['DOWNLOAD_QUEUE']) job.fetch() assert job.target_queues == [queue]
def dispatch_job(): """Internal helper to dispatch a new job""" redis_store = get_db() taxon = app.config['TAXON'] job_id = _generate_jobid(taxon) job = Job(redis_store, job_id) if 'X-Forwarded-For' in request.headers: job.ip_addr = request.headers.getlist("X-Forwarded-For")[0].rpartition( ' ')[-1] else: job.ip_addr = request.remote_addr or 'untrackable' ncbi = request.form.get('ncbi', '').strip() val = request.form.get('email', '').strip() if val: job.email = val job.minimal = _get_checkbox(request, 'minimal') job.all_orfs = _get_checkbox(request, 'all_orfs') job.smcogs = _get_checkbox(request, 'smcogs') job.clusterblast = _get_checkbox(request, 'clusterblast') job.knownclusterblast = _get_checkbox(request, 'knownclusterblast') job.subclusterblast = _get_checkbox(request, 'subclusterblast') job.cc_mibig = _get_checkbox(request, 'cc_mibig') job.jobtype = request.form.get('jobtype', app.config['DEFAULT_JOBTYPE']) if job.jobtype not in (app.config['LEGACY_JOBTYPE'], app.config['DEFAULT_JOBTYPE']): raise BadRequest(f"Invalid jobtype {job.jobtype}") genefinder = request.form.get('genefinder', '') if genefinder: job.genefinder = genefinder hmmdetection_strictness = request.form.get('hmmdetection_strictness', '') if hmmdetection_strictness: job.hmmdetection_strictness = hmmdetection_strictness val = request.form.get('from', 0, type=int) if val: job.from_pos = val val = request.form.get('to', 0, type=int) if val: job.to_pos = val job.asf = _get_checkbox(request, 'asf') job.tta = _get_checkbox(request, 'tta') job.cassis = _get_checkbox(request, 'cassis') job.clusterhmmer = _get_checkbox(request, 'clusterhmmer') job.pfam2go = _get_checkbox(request, 'pfam2go') job.rre = _get_checkbox(request, 'rre') job.tigrfam = _get_checkbox(request, 'tigrfam') dirname = path.join(app.config['RESULTS_PATH'], job.job_id, 'input') os.makedirs(dirname) if ncbi != '': if ' ' in ncbi: raise BadRequest("Spaces are not allowed in an NCBI ID.") job.download = ncbi job.needs_download = True else: upload = request.files['seq'] if upload is not None: filename = secure_filename(upload.filename) upload.save(path.join(dirname, filename)) if not path.exists(path.join(dirname, filename)): raise BadRequest("Could not save file!") job.filename = filename job.needs_download = False else: raise BadRequest("Uploading input file failed!") if 'gff3' in request.files: gff_upload = request.files['gff3'] if gff_upload is not None: gff_filename = secure_filename(gff_upload.filename) gff_upload.save(path.join(dirname, gff_filename)) if not path.exists(path.join(dirname, gff_filename)): raise BadRequest("Could not save GFF file!") job.gff3 = gff_filename if 'sideload' in request.files: sideload = request.files['sideload'] if sideload is not None: sideload_filename = secure_filename(sideload.filename) sideload.save(path.join(dirname, sideload_filename)) if not path.exists(path.join(dirname, sideload_filename)): raise BadRequest("Could not save sideload info file!") job.sideload = sideload_filename job.trace.append("{}-api".format(platform.node())) _submit_job(redis_store, job, app.config) _dark_launch_job(redis_store, job, app.config) return job
def test__submit_job_legacy(app): """Test legacy job submission works as expected""" fake_db = get_db() queue = app.config['LEGACY_QUEUE'] legacy_jobtype = app.config['LEGACY_JOBTYPE'] assert app.config['FAKE_DB'] old_len = fake_db.llen(queue) job = Job(fake_db, 'taxon-fake') job.jobtype = legacy_jobtype job.commit() utils._submit_job(fake_db, job, app.config) assert old_len + 1 == fake_db.llen(queue) fake_db.ltrim(app.config['DOWNLOAD_QUEUE'], 2, 1) # clear queue job = Job(fake_db, 'taxon-fake') job.jobtype = legacy_jobtype job.needs_download = True job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(app.config['DOWNLOAD_QUEUE']) job.fetch() assert job.target_queues == [queue]
def test__submit_job_email_waitlist(app): """Test job submission waitlisting by email works as expected""" fake_db = get_db() email = "*****@*****.**" queue = "{}:{}".format(app.config['WAITLIST_PREFIX'], email) assert app.config['FAKE_DB'] app.config['MAX_JOBS_PER_USER'] = -1 job = Job(fake_db, 'taxon-fake') job.email = email job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(queue) fake_db.ltrim(app.config['DOWNLOAD_QUEUE'], 2, 1) # clear queue job = Job(fake_db, 'taxon-fake') job.email = email job.needs_download = True job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(app.config['DOWNLOAD_QUEUE']) job.fetch() assert job.target_queues == [app.config['DEFAULT_QUEUE'], queue]
def test__submit_job_ip_waitlist(app): """Test job submission waitlisting by IP works as expected""" fake_db = get_db() ip = "192.168.0.1" queue = "{}:{}".format(app.config['WAITLIST_PREFIX'], ip) assert app.config['FAKE_DB'] app.config['MAX_JOBS_PER_USER'] = -1 job = Job(fake_db, 'taxon-fake') job.ip_addr = ip job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(queue) fake_db.ltrim(app.config['DOWNLOAD_QUEUE'], 2, 1) # clear queue job = Job(fake_db, 'taxon-fake') job.ip_addr = ip job.needs_download = True job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(app.config['DOWNLOAD_QUEUE']) job.fetch() assert job.target_queues == [app.config['DEFAULT_QUEUE'], queue]
def test_server_status(self): """Test if server status returns the correct values""" expected_status = dict( status='idle', queue_length=0, running=0, fast=0, total_jobs=89132, ts_queued=None, ts_queued_m=None, ts_fast=None, ts_fast_m=None, legacy=0, ts_legacy=None, ts_legacy_m=None, ) rv = self.client.get('/api/v1.0/stats') self.assertEqual(rv.json, expected_status) # fake a fast job redis_store = self._ctx.g._database fake_id = 'taxon-fake' j = Job(redis_store, fake_id) j.commit() redis_store.lpush('jobs:minimal', j.job_id) rv = self.client.get('/api/v1.0/stats') expected_status = dict( status='working', queue_length=0, running=0, fast=1, total_jobs=89132, ts_fast=j.added.strftime("%Y-%m-%d %H:%M"), ts_fast_m=j.added.strftime("%Y-%m-%dT%H:%M:%SZ"), ts_queued=None, ts_queued_m=None, legacy=0, ts_legacy=None, ts_legacy_m=None, ) self.assertEqual(rv.json, expected_status) # fake a normal job redis_store.lpop('jobs:minimal') redis_store.lpush('jobs:queued', j.job_id) rv = self.client.get('/api/v1.0/stats') expected_status = dict( status='working', queue_length=1, running=0, fast=0, total_jobs=89132, ts_queued=j.added.strftime("%Y-%m-%d %H:%M"), ts_queued_m=j.added.strftime("%Y-%m-%dT%H:%M:%SZ"), ts_fast=None, ts_fast_m=None, legacy=0, ts_legacy=None, ts_legacy_m=None, ) self.assertEqual(rv.json, expected_status) # fake a running job j.state = "running" j.status = "running: not really" j.commit() redis_store.rpoplpush('jobs:queued', 'jobs:running') rv = self.client.get('/api/v1.0/stats') expected_status = dict( status='working', queue_length=0, running=1, fast=0, total_jobs=89132, ts_queued=None, ts_queued_m=None, ts_fast=None, ts_fast_m=None, legacy=0, ts_legacy=None, ts_legacy_m=None, ) self.assertEqual(rv.json, expected_status)
def test__submit_job_minimal(app): """Test fast mode job submission works as expected""" fake_db = get_db() queue = app.config['FAST_QUEUE'] assert app.config['FAKE_DB'] old_len = fake_db.llen(queue) job = Job(fake_db, 'taxon-fake') job.minimal = True job.commit() utils._submit_job(fake_db, job, app.config) assert old_len + 1 == fake_db.llen(queue) fake_db.ltrim(app.config['DOWNLOAD_QUEUE'], 2, 1) # clear queue job = Job(fake_db, 'taxon-fake') job.minimal = True job.needs_download = True job.commit() utils._submit_job(fake_db, job, app.config) assert 1 == fake_db.llen(app.config['DOWNLOAD_QUEUE']) job.fetch() assert job.target_queues == [queue]
def test_restart(db): j = Job(db, 'bacteria-1') j.download = 'foo' j.filename = 'foo.gbk' j.commit() db.lpush('jobs:running', j.job_id) args = Namespace(job_id=j.job_id, queue="jobs:queued") # Jobs in 'created' state can't be restarted with pytest.raises(AntismashRunError): job.restart(args, db) j.state = 'running' j.commit() assert job.restart(args, db) == "Restarted job {}".format(j.job_id) assert db.llen('jobs:running') == 0 assert db.llen('jobs:queued') == 1 assert db.rpoplpush('jobs:queued', 'jobs:queued') == j.job_id j.fetch() assert j.state == 'queued' assert j.status == 'restarted' print(j.to_dict()) assert not j.filename args = Namespace(job_id='bacteria-fake') with pytest.raises(AntismashRunError): job.restart(args, db)