def deletegalaxyworkflow(workflow_galaxyid): try: galaxycon = galaxy_connection() galaxycon.nocache = True galaxycon.workflows.delete_workflow(workflow_galaxyid) except Exception as e: logging.warning("Problem while deleting workflow: %s" % (e))
def initializeworkspacejob(historyid): galaxycon = galaxy_connection() galaxycon.nocache = True hc = galaxycon.histories.show_history(historyid, contents=True) hi = galaxycon.histories.show_history(historyid) w = WorkspaceHistory.objects.get(history=historyid) w.history_content_json = json.dumps(hc) w.history_info_json = json.dumps(hi) w.save()
def deletegalaxyhistory(historyid): """ Celery task that will delete an history on the galaxy server in background """ logging.info("Deleting history %s" % (historyid)) try: galaxycon = galaxy_connection() galaxycon.nocache = True galaxycon.histories.delete_history(historyid, purge=True) except Exception as e: logging.warning("Problem while deleting history: %s" % (e))
def deleteoldgalaxyworkflows(): logger.info("Start old workflow deletion task") datecutoff = datetime.now() - timedelta(days=1) try: for w in Workflow.objects.exclude(category='base').filter(date__lte=datecutoff).filter(deleted=False): # No associated workspace if WorkspaceHistory.objects.filter(workflow = w.id).count() == 0: galaxycon = galaxy_connection() galaxycon.nocache = True galaxycon.workflows.delete_workflow(w.id_galaxy) w.delete() except Exception as e: logging.warning("Problem while deleting history: %s" % (e)) logger.info("Old workflow deletion task finished")
def deleteoldgalaxyhistory(): logger.info("Start old workspace deletion task") galaxycon = galaxy_connection() galaxycon.nocache = True datecutoff = datetime.now() - timedelta(days=14) for e in WorkspaceHistory.objects.filter(deleted=False).filter( finished=True).filter(created_date__lte=datecutoff): if e.workflow is not None: deletegalaxyworkflow(e.workflow.id_galaxy) e.workflow.deleted = True e.workflow.save() e.deleted = True e.save() deletegalaxyhistory(e.history) logger.info("Old workspace deletion task finished")
def deleteoldgalaxyworkflows(): logger.info("Start old workflow deletion task") datecutoff = datetime.now() - timedelta(days=1) try: for w in Workflow.objects.exclude(category='base').filter( date__lte=datecutoff).filter(deleted=False): # No associated workspace if WorkspaceHistory.objects.filter(workflow=w.id).count() == 0: galaxycon = galaxy_connection() galaxycon.nocache = True galaxycon.workflows.delete_workflow(w.id_galaxy) w.delete() except Exception as e: logging.warning("Problem while deleting history: %s" % (e)) logger.info("Old workflow deletion task finished")
def updateworkspacestatus(historyid): ## To be sure that the task is not reexecuted in parallel while ## the previous one is still running lock_id = "lock_ngphylo_workspacemonitoring_" + historyid # We lock this history for 9 seconds, to avoid too frequent refreshs acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE_SHORT) release_lock = lambda: cache.delete(lock_id) if acquire_lock(): pass else: return galaxycon = galaxy_connection() galaxycon.nocache = True #print "Monitoring workspace " + historyid finished = False error = False email = None try: hc = galaxycon.histories.show_history(historyid, contents=True) hi = galaxycon.histories.show_history(historyid) w = WorkspaceHistory.objects.get(history=historyid) if w.monitored and not w.finished and not w.deleted: w.history_content_json = json.dumps(hc) w.history_info_json = json.dumps(hi) w.save() if len(hc) > 1: finished = True for file in hc: if ('running' in file.get('state', '') or 'queued' in file.get('state', '') or 'new' in file.get('state', '')): finished = False if 'error' in file.get('state', ''): error = True finished = True break if finished: w.finished = finished logging.warning("history %s finished? %r" % (historyid, w.finished)) if w and w.email and re.match(r"[^@]+@[^@]+\.[^@]+", w.email): logging.warning("Sending EMail to %s", w.email) try: citation = "Lemoine F, Correia D, Lefort V, Doppelt-Azeroual O, Mareuil F, Cohen-Boulakia S, Gascuel O\n" \ "NGPhylogeny.fr: new generation phylogenetic services for non-specialists.\n" \ "Nucleic Acids Research 2019 (https://doi.org/10.1093/nar/gkz303).\n" message = "Dear NGPhylogeny user, \n\n" if error: message = message + "Your NGPhylogeny job finished with errors.\n\n" else: message = message + "Your NGPhylogeny job finished successfuly.\n" please = 'Please visit http://%s%s to check results\n\n' % ( "ngphylogeny.fr", reverse('history_detail', kwargs={'history_id': historyid})) message = message + please message = message + "Thank you for using ngphylogeny.fr\n\n" message = message + "NGPhylogeny.fr development team.\n\n" message = message + citation send_mail( 'NGPhylogeny.fr results', message, '*****@*****.**', [w.email], fail_silently=False, ) #print(message) except SMTPException as e: logging.warning("Problem with smtp server : %s" % (e)) except Exception as e: logging.warning( "Unknown Problem while sending e-mail: %s" % (e)) w.save() except: logging.warning('Problem with Galaxy server, will retry later')
def checkblastruns(): """ Every minutes, check running pasteur blast runs """ logger.info("Start pasteur blast task check") ## To be sure that the task is not reexecuted in parallel while ## the previous one is still running lock_id = "lock_ngphylo_blastmonitoring" acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE) release_lock = lambda: cache.delete(lock_id) if acquire_lock(): pass else: return try: galaxycon = galaxy_connection() galaxycon.nocache = True for b in BlastRun.objects.filter(deleted=False, server=BlastRun.PASTEUR).filter( Q(status=BlastRun.PENDING) | Q(status=BlastRun.RUNNING)): # State of the output file we want (blast XML) dataset = galaxycon.histories.show_dataset(b.history, b.history_fileid) state = dataset.get('state') infos = dataset.get('misc_info') b.message = infos if state == 'ok': b.status = BlastRun.FINISHED blast_type = BlastRun.blast_type(BlastRun.PASTEUR, b.blastprog) ## Download the result file from galaxy first... tmp_file = tempfile.NamedTemporaryFile() galaxycon.datasets.download_dataset(b.history_fileid, tmp_file.name, False) query_seq_bk = b.query_seq frame = 1 if blast_type == 'blastx' or blast_type == 'tblastx': frame = majorityQueryFrame(tmp_file.name) b.query_seq = biofile.translate(str(b.query_seq), frame) b.save() result_handle = open(tmp_file.name, "r") blast_records = NCBIXML.parse(result_handle) ms = PseudoMSA(b.query_id, b.query_seq, query_seq_bk, frame, blast_type) for blast_record in blast_records: for alignment in blast_record.alignments: for hsp in alignment.hsps: e_val = hsp.expect leng = float(hsp.align_length) / float( len(b.query_seq)) if e_val < b.evalue and leng >= b.coverage: ms.add_hsp( biofile.newick_clean(alignment.title), hsp) if blast_type == 'blastx' or blast_type == 'tblastx': ms.crop_alignment(b.maxseqs) b.query_seq = "".join(ms.query_seq) b.save() nseq = 0 for id, seq, fullseq in ms.first_n_max_score_sequences( b.maxseqs): s = BlastSubject(subject_id=id, subject_seq=seq, subject_fullseq=fullseq, blastrun=b) s.save() nseq += 1 if nseq > 0: b.tree = b.build_nj_tree() b.status = BlastRun.FINISHED b.save() else: b.status = BlastRun.ERROR b.message = "Blast Search returned no results" b.save() elif state == 'queued' or state == 'new': b.status = BlastRun.PENDING elif state == 'running': b.status = BlastRun.RUNNING else: b.status = BlastRun.ERROR b.save() if b.email is not None and re.match( r"[^@]+@[^@]+\.[^@]+", b.email) and (b.status == BlastRun.ERROR or b.status == BlastRun.FINISHED): try: message = "Dear NGPhylogeny user, \n\n" if b.status != b.FINISHED: message = message + "Your NGPhylogeny BLAST job finished with errors.\n\n" else: message = message + "Your NGPhylogeny BLAST job finished successfuly.\n" please = 'Please visit http://%s%s to check results\n\n' % ( "ngphylogeny.fr", reverse('blast_view', kwargs={'pk': b.id})) message = message + please message = message + "Thank you for using ngphylogeny.fr\n\n" message = message + "NGPhylogeny.fr development team.\n" send_mail( 'NGPhylogeny.fr BLAST results', message, '*****@*****.**', [b.email], fail_silently=False, ) except SMTPException as e: logging.warning("Problem with smtp server : %s" % (e)) except Exception as e: logging.warning( "Unknown Problem while sending e-mail: %s" % (e)) except Exception as e: b.status = BlastRun.ERROR b.message = str(e) b.save() logger.info("Error while checking blast run: %s" % (e)) logging.exception("message") release_lock() logger.info("Pasteur blast runs checked")
def launch_pasteur_blast(blastrunid, sequence, prog, db, evalue, coverage, maxseqs): """ Celery task that will launch a blast on the pasteur Galaxy Server """ logging.info("Blasting %s with %s on %s" % (sequence, prog, db)) b = BlastRun.objects.get(id=blastrunid) try: fasta_io = StringIO(sequence) records = list(SeqIO.parse(fasta_io, "fasta")) if len(records) == 1: galaxycon = galaxy_connection() galaxycon.nocache = True history = galaxycon.histories.create_history(name="BlastXplorer") b.history = history.get("id") b.query_id = biofile.cleanseqname(records[0].id) b.query_seq = records[0].seq b.evalue = evalue b.coverage = coverage b.database = db b.blastprog = prog b.maxseqs = maxseqs b.status = BlastRun.PENDING b.save() blast_type = BlastRun.blast_type(BlastRun.PASTEUR, prog) blast_inputtype = BlastRun.blast_inputtype(BlastRun.PASTEUR, prog) # We check alphabet of given sequence if ((blast_inputtype == "nt" and not biofile.check_nt(b.query_seq)) or (blast_inputtype == "aa" and not biofile.check_aa(b.query_seq))): b.status = BlastRun.ERROR b.message = "The given sequence has the wrong alphabet. Program %s expects %s sequence" % ( blast_type, blast_inputtype) elif blast_type is not None: tmp_file = tempfile.NamedTemporaryFile() tmp_file.write(sequence) tmp_file.flush() if biofile.is_fasta_one_seq(tmp_file.name): ## Upload input query file to galaxy outputs = galaxycon.tools.upload_file( path=tmp_file.name, file_name="blastinput.fasta", history_id=history.get("id"), file_type="fasta") file_id = outputs.get('outputs')[0].get('id') ## Configuring job tool_inputs = inputs() tool_inputs.set_dataset_param("query", file_id) tool_inputs.set_param("db_opts|database", db) tool_inputs.set_param("blast_type", blast_type) tool_inputs.set_param("evalue_cutoff", evalue) tool_inputs.set_param("output|out_format", "5") ## Running blast job outputs = galaxycon.tools.run_tool( history_id=history.get("id"), tool_id=prog, tool_inputs=tool_inputs) b.history_fileid = outputs.get("outputs")[0].get("id") else: b.status = BlastRun.ERROR b.message = "Bad input FASTA file format" else: b.status = BlastRun.ERROR b.message = "Wrong blast program %s" % (prog) b.save() else: b.status = BlastRun.ERROR b.message = "More than one record in the fasta file! %d" % (len( list(records))) except Exception as e: logging.exception(str(e)) b.status = BlastRun.ERROR b.message = str(e) b.save() time.sleep(30)
def monitorworkspace(historyid): """ Celery task that will monitor galaxy workspace It will update content of the workspace django model every 10 seconds It will wait for end of execution of all jobs and send a mail at the end, if the mail has been given by the user. this task is launched by workspace.views.HistoryDetailView only once, when the WorkspaceHistory is marked as "notmonitored" """ galaxycon = galaxy_connection() galaxycon.nocache = True #print "Monitoring workspace " + historyid finished = False error = False email = None while not finished: try: hc = galaxycon.histories.show_history(historyid, contents=True) hi = galaxycon.histories.show_history(historyid) w = WorkspaceHistory.objects.get(history=historyid) w.history_content_json = json.dumps(hc) w.history_info_json = json.dumps(hi) w.save() finished = True for file in hc: if ('running' in file.get('state', '') or 'queued' in file.get('state', '') or 'new' in file.get('state', '')): finished = False if 'error' in file.get('state', ''): error = True finished = True except: logging.warning('Problem with Galaxy server, waiting 1 minute') time.sleep(60) if not finished: time.sleep(10) w.finished = True w.save() logging.warning("history finished? %r" % (w.finished)) if w and w.email and re.match(r"[^@]+@[^@]+\.[^@]+", w.email): try: message = "Dear NGPhylogeny user, \n\n" if error: message = message + "Your NGPhylogeny job finished with errors.\n\n" else: message = message + "Your NGPhylogeny job finished successfuly.\n" please = 'Please visit http://%s%s to check results\n\n' % ( "ngphylogeny.fr", reverse('history_detail', kwargs={'history_id': historyid})) message = message + please message = message + "Thank you for using ngphylogeny.fr\n\n" message = message + "NGPhylogeny.fr development team.\n" send_mail( 'NGPhylogeny.fr results', message, '*****@*****.**', [w.email], fail_silently=False, ) print(message) except SMTPException as e: logging.warning("Problem with smtp server : %s" % (e)) except Exception as e: logging.warning("Unknown Problem while sending e-mail: %s" % (e))
def checkblastruns(): """ Every minutes, check running pasteur blast runs """ logger.info("Start pasteur blast task check") ## To be sure that the task is not reexecuted in parallel while ## the previous one is still running lock_id = "lock_ngphylo_blastmonitoring" acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE) release_lock = lambda: cache.delete(lock_id) if acquire_lock(): pass else: return try: galaxycon = galaxy_connection() galaxycon.nocache = True for b in BlastRun.objects.filter(deleted=False, server=BlastRun.PASTEUR).filter(Q(status=BlastRun.PENDING) | Q(status=BlastRun.RUNNING)): # State of the output file we want (blast XML) dataset=galaxycon.histories.show_dataset(b.history,b.history_fileid) state=dataset.get('state') infos=dataset.get('misc_info') b.message=infos if state == 'ok': b.status=BlastRun.FINISHED blast_type = BlastRun.blast_type(BlastRun.PASTEUR, b.blastprog) ## Download the result file from galaxy first... tmp_file = tempfile.NamedTemporaryFile() galaxycon.datasets.download_dataset(b.history_fileid,tmp_file.name,False) query_seq_bk = b.query_seq frame = 1 if blast_type == 'blastx' or blast_type == 'tblastx' : frame=majorityQueryFrame(tmp_file.name) b.query_seq = biofile.translate(str(b.query_seq), frame) b.save() result_handle = open(tmp_file.name, "r") blast_records = NCBIXML.parse(result_handle) ms = PseudoMSA(b.query_id, b.query_seq, query_seq_bk, frame, blast_type) for blast_record in blast_records: for alignment in blast_record.alignments: for hsp in alignment.hsps: e_val = hsp.expect leng = float(hsp.align_length) / float(len(b.query_seq)) if e_val < b.evalue and leng >= b.coverage: ms.add_hsp(biofile.newick_clean(alignment.title), hsp) if blast_type == 'blastx' or blast_type == 'tblastx' : ms.crop_alignment(b.maxseqs) b.query_seq = "".join(ms.query_seq) b.save() nseq=0 for id, seq, fullseq in ms.first_n_max_score_sequences(b.maxseqs): s = BlastSubject(subject_id=id, subject_seq=seq, subject_fullseq=fullseq, blastrun=b) s.save() nseq+=1 if nseq>0: b.tree = b.build_nj_tree() b.status = BlastRun.FINISHED b.save() else: b.status = BlastRun.ERROR b.message = "Blast Search returned no results" b.save() elif state == 'queued' or state == 'new': b.status=BlastRun.PENDING elif state == 'running': b.status=BlastRun.RUNNING else: b.status=BlastRun.ERROR b.save() if b.email is not None and re.match(r"[^@]+@[^@]+\.[^@]+", b.email) and (b.status == BlastRun.ERROR or b.status == BlastRun.FINISHED): try: message = "Dear NGPhylogeny user, \n\n" if b.status != b.FINISHED: message = message + "Your NGPhylogeny BLAST job finished with errors.\n\n" else: message = message + "Your NGPhylogeny BLAST job finished successfuly.\n" please = 'Please visit http://%s%s to check results\n\n' % ( "ngphylogeny.fr", reverse('blast_view', kwargs={'pk': b.id})) message = message + please message = message + "Thank you for using ngphylogeny.fr\n\n" message = message + "NGPhylogeny.fr development team.\n" send_mail( 'NGPhylogeny.fr BLAST results', message, '*****@*****.**', [b.email], fail_silently=False, ) except SMTPException as e: logging.warning("Problem with smtp server : %s" % (e)) except Exception as e: logging.warning( "Unknown Problem while sending e-mail: %s" % (e)) except Exception as e: b.status=BlastRun.ERROR b.message=str(e) b.save() logger.info("Error while checking blast run: %s" % (e)) logging.exception("message") release_lock() logger.info("Pasteur blast runs checked")
def launch_pasteur_blast(blastrunid, sequence, prog, db, evalue, coverage, maxseqs): """ Celery task that will launch a blast on the pasteur Galaxy Server """ logging.info("Blasting %s with %s on %s" % (sequence, prog, db)) b = BlastRun.objects.get(id=blastrunid) try: fasta_io = StringIO(sequence) records = list(SeqIO.parse(fasta_io, "fasta")) if len(records) == 1: galaxycon = galaxy_connection() galaxycon.nocache = True history = galaxycon.histories.create_history(name="BlastXplorer") b.history = history.get("id") b.query_id = biofile.cleanseqname(records[0].id) b.query_seq = records[0].seq b.evalue = evalue b.coverage = coverage b.database = db b.blastprog = prog b.maxseqs = maxseqs b.status = BlastRun.PENDING b.save() blast_type = BlastRun.blast_type(BlastRun.PASTEUR, prog) blast_inputtype = BlastRun.blast_inputtype(BlastRun.PASTEUR, prog) # We check alphabet of given sequence if ((blast_inputtype == "nt" and not biofile.check_nt(b.query_seq)) or (blast_inputtype == "aa" and not biofile.check_aa(b.query_seq))): b.status = BlastRun.ERROR b.message = "The given sequence has the wrong alphabet. Program %s expects %s sequence" % ( blast_type, blast_inputtype) elif blast_type is not None: tmp_file = tempfile.NamedTemporaryFile() tmp_file.write(sequence) tmp_file.flush() if biofile.is_fasta_one_seq(tmp_file.name): ## Upload input query file to galaxy outputs = galaxycon.tools.upload_file(path=tmp_file.name,file_name="blastinput.fasta",history_id=history.get("id"),file_type="fasta") file_id = outputs.get('outputs')[0].get('id') ## Configuring job tool_inputs=inputs() tool_inputs.set_dataset_param("query",file_id) tool_inputs.set_param("db_opts|database", db) tool_inputs.set_param("blast_type", blast_type) tool_inputs.set_param("evalue_cutoff", evalue) tool_inputs.set_param("output|out_format", "5") ## Running blast job outputs=galaxycon.tools.run_tool(history_id=history.get("id"),tool_id=prog,tool_inputs=tool_inputs) b.history_fileid = outputs.get("outputs")[0].get("id") else: b.status=BlastRun.ERROR b.message="Bad input FASTA file format" else: b.status=BlastRun.ERROR b.message="Wrong blast program %s" % (prog) b.save() else: b.status = BlastRun.ERROR b.message = "More than one record in the fasta file! %d" % ( len(list(records))) except Exception as e: logging.exception(str(e)) b.status = BlastRun.ERROR b.message = str(e) b.save() time.sleep(30)