Example #1
0
def deletegalaxyworkflow(workflow_galaxyid):
    try:
        galaxycon = galaxy_connection()
        galaxycon.nocache = True
        galaxycon.workflows.delete_workflow(workflow_galaxyid)
    except Exception as e:
        logging.warning("Problem while deleting workflow: %s" % (e))
Example #2
0
def deletegalaxyworkflow(workflow_galaxyid):
    try:
        galaxycon = galaxy_connection()
        galaxycon.nocache = True
        galaxycon.workflows.delete_workflow(workflow_galaxyid)
    except Exception as e:
        logging.warning("Problem while deleting workflow: %s" % (e))
Example #3
0
def initializeworkspacejob(historyid):
    galaxycon = galaxy_connection()
    galaxycon.nocache = True
    hc = galaxycon.histories.show_history(historyid, contents=True)
    hi = galaxycon.histories.show_history(historyid)
    w = WorkspaceHistory.objects.get(history=historyid)
    w.history_content_json = json.dumps(hc)
    w.history_info_json = json.dumps(hi)
    w.save()
Example #4
0
def deletegalaxyhistory(historyid):
    """
    Celery task that will delete an history on the galaxy server in background
    """
    logging.info("Deleting history %s" % (historyid))
    try:
        galaxycon = galaxy_connection()
        galaxycon.nocache = True
        galaxycon.histories.delete_history(historyid, purge=True)
    except Exception as e:
        logging.warning("Problem while deleting history: %s" % (e))
Example #5
0
def deletegalaxyhistory(historyid):
    """
    Celery task that will delete an history on the galaxy server in background
    """
    logging.info("Deleting history %s" % (historyid))
    try:
        galaxycon = galaxy_connection()
        galaxycon.nocache = True
        galaxycon.histories.delete_history(historyid, purge=True)
    except Exception as e:
        logging.warning("Problem while deleting history: %s" % (e))
Example #6
0
def deleteoldgalaxyworkflows():
    logger.info("Start old workflow deletion task")
    datecutoff = datetime.now() - timedelta(days=1)
    try:
        for w in Workflow.objects.exclude(category='base').filter(date__lte=datecutoff).filter(deleted=False):
            # No associated workspace
            if WorkspaceHistory.objects.filter(workflow = w.id).count() == 0:
                galaxycon = galaxy_connection()
                galaxycon.nocache = True
                galaxycon.workflows.delete_workflow(w.id_galaxy)
                w.delete()
    except Exception as e:
        logging.warning("Problem while deleting history: %s" % (e))
    logger.info("Old workflow deletion task finished")
Example #7
0
def deleteoldgalaxyhistory():
    logger.info("Start old workspace deletion task")
    galaxycon = galaxy_connection()
    galaxycon.nocache = True
    datecutoff = datetime.now() - timedelta(days=14)
    for e in WorkspaceHistory.objects.filter(deleted=False).filter(
            finished=True).filter(created_date__lte=datecutoff):
        if e.workflow is not None:
            deletegalaxyworkflow(e.workflow.id_galaxy)
            e.workflow.deleted = True
            e.workflow.save()
        e.deleted = True
        e.save()
        deletegalaxyhistory(e.history)
    logger.info("Old workspace deletion task finished")
Example #8
0
def deleteoldgalaxyworkflows():
    logger.info("Start old workflow deletion task")
    datecutoff = datetime.now() - timedelta(days=1)
    try:
        for w in Workflow.objects.exclude(category='base').filter(
                date__lte=datecutoff).filter(deleted=False):
            # No associated workspace
            if WorkspaceHistory.objects.filter(workflow=w.id).count() == 0:
                galaxycon = galaxy_connection()
                galaxycon.nocache = True
                galaxycon.workflows.delete_workflow(w.id_galaxy)
                w.delete()
    except Exception as e:
        logging.warning("Problem while deleting history: %s" % (e))
    logger.info("Old workflow deletion task finished")
Example #9
0
def updateworkspacestatus(historyid):

    ## To be sure that the task is not reexecuted in parallel while
    ## the previous one is still running
    lock_id = "lock_ngphylo_workspacemonitoring_" + historyid
    # We lock this history for 9 seconds, to avoid too frequent refreshs
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE_SHORT)
    release_lock = lambda: cache.delete(lock_id)

    if acquire_lock():
        pass
    else:
        return

    galaxycon = galaxy_connection()
    galaxycon.nocache = True
    #print "Monitoring workspace " + historyid
    finished = False
    error = False
    email = None
    try:
        hc = galaxycon.histories.show_history(historyid, contents=True)
        hi = galaxycon.histories.show_history(historyid)
        w = WorkspaceHistory.objects.get(history=historyid)
        if w.monitored and not w.finished and not w.deleted:
            w.history_content_json = json.dumps(hc)
            w.history_info_json = json.dumps(hi)
            w.save()
            if len(hc) > 1:
                finished = True
                for file in hc:
                    if ('running' in file.get('state', '')
                            or 'queued' in file.get('state', '')
                            or 'new' in file.get('state', '')):
                        finished = False
                    if 'error' in file.get('state', ''):
                        error = True
                        finished = True
                        break
            if finished:
                w.finished = finished
                logging.warning("history %s finished? %r" %
                                (historyid, w.finished))
                if w and w.email and re.match(r"[^@]+@[^@]+\.[^@]+", w.email):
                    logging.warning("Sending EMail to %s", w.email)
                    try:
                        citation = "Lemoine F, Correia D, Lefort V, Doppelt-Azeroual O, Mareuil F, Cohen-Boulakia S, Gascuel O\n" \
                                   "NGPhylogeny.fr: new generation phylogenetic services for non-specialists.\n" \
                                   "Nucleic Acids Research 2019 (https://doi.org/10.1093/nar/gkz303).\n"
                        message = "Dear NGPhylogeny user, \n\n"
                        if error:
                            message = message + "Your NGPhylogeny job finished with errors.\n\n"
                        else:
                            message = message + "Your NGPhylogeny job finished successfuly.\n"
                        please = 'Please visit http://%s%s to check results\n\n' % (
                            "ngphylogeny.fr",
                            reverse('history_detail',
                                    kwargs={'history_id': historyid}))
                        message = message + please
                        message = message + "Thank you for using ngphylogeny.fr\n\n"
                        message = message + "NGPhylogeny.fr development team.\n\n"
                        message = message + citation

                        send_mail(
                            'NGPhylogeny.fr results',
                            message,
                            '*****@*****.**',
                            [w.email],
                            fail_silently=False,
                        )
                        #print(message)
                    except SMTPException as e:
                        logging.warning("Problem with smtp server : %s" % (e))
                    except Exception as e:
                        logging.warning(
                            "Unknown Problem while sending e-mail: %s" % (e))
            w.save()
    except:
        logging.warning('Problem with Galaxy server, will retry later')
Example #10
0
def checkblastruns():
    """
    Every minutes, check running pasteur blast runs
    """
    logger.info("Start pasteur blast task check")

    ## To be sure that the task is not reexecuted in parallel while
    ## the previous one is still running
    lock_id = "lock_ngphylo_blastmonitoring"
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    release_lock = lambda: cache.delete(lock_id)

    if acquire_lock():
        pass
    else:
        return

    try:
        galaxycon = galaxy_connection()
        galaxycon.nocache = True

        for b in BlastRun.objects.filter(deleted=False,
                                         server=BlastRun.PASTEUR).filter(
                                             Q(status=BlastRun.PENDING)
                                             | Q(status=BlastRun.RUNNING)):
            # State of the output file we want (blast XML)
            dataset = galaxycon.histories.show_dataset(b.history,
                                                       b.history_fileid)
            state = dataset.get('state')
            infos = dataset.get('misc_info')
            b.message = infos

            if state == 'ok':
                b.status = BlastRun.FINISHED
                blast_type = BlastRun.blast_type(BlastRun.PASTEUR, b.blastprog)
                ## Download the result file from galaxy first...
                tmp_file = tempfile.NamedTemporaryFile()
                galaxycon.datasets.download_dataset(b.history_fileid,
                                                    tmp_file.name, False)
                query_seq_bk = b.query_seq
                frame = 1
                if blast_type == 'blastx' or blast_type == 'tblastx':
                    frame = majorityQueryFrame(tmp_file.name)
                    b.query_seq = biofile.translate(str(b.query_seq), frame)
                    b.save()

                result_handle = open(tmp_file.name, "r")
                blast_records = NCBIXML.parse(result_handle)
                ms = PseudoMSA(b.query_id, b.query_seq, query_seq_bk, frame,
                               blast_type)
                for blast_record in blast_records:
                    for alignment in blast_record.alignments:
                        for hsp in alignment.hsps:
                            e_val = hsp.expect
                            leng = float(hsp.align_length) / float(
                                len(b.query_seq))
                            if e_val < b.evalue and leng >= b.coverage:
                                ms.add_hsp(
                                    biofile.newick_clean(alignment.title), hsp)

                if blast_type == 'blastx' or blast_type == 'tblastx':
                    ms.crop_alignment(b.maxseqs)
                    b.query_seq = "".join(ms.query_seq)
                    b.save()

                nseq = 0
                for id, seq, fullseq in ms.first_n_max_score_sequences(
                        b.maxseqs):
                    s = BlastSubject(subject_id=id,
                                     subject_seq=seq,
                                     subject_fullseq=fullseq,
                                     blastrun=b)
                    s.save()
                    nseq += 1

                if nseq > 0:
                    b.tree = b.build_nj_tree()
                    b.status = BlastRun.FINISHED
                    b.save()
                else:
                    b.status = BlastRun.ERROR
                    b.message = "Blast Search returned no results"
                    b.save()
            elif state == 'queued' or state == 'new':
                b.status = BlastRun.PENDING
            elif state == 'running':
                b.status = BlastRun.RUNNING
            else:
                b.status = BlastRun.ERROR
            b.save()

            if b.email is not None and re.match(
                    r"[^@]+@[^@]+\.[^@]+",
                    b.email) and (b.status == BlastRun.ERROR
                                  or b.status == BlastRun.FINISHED):
                try:
                    message = "Dear NGPhylogeny user, \n\n"
                    if b.status != b.FINISHED:
                        message = message + "Your NGPhylogeny BLAST job finished with errors.\n\n"
                    else:
                        message = message + "Your NGPhylogeny BLAST job finished successfuly.\n"
                    please = 'Please visit http://%s%s to check results\n\n' % (
                        "ngphylogeny.fr",
                        reverse('blast_view', kwargs={'pk': b.id}))
                    message = message + please
                    message = message + "Thank you for using ngphylogeny.fr\n\n"
                    message = message + "NGPhylogeny.fr development team.\n"
                    send_mail(
                        'NGPhylogeny.fr BLAST results',
                        message,
                        '*****@*****.**',
                        [b.email],
                        fail_silently=False,
                    )
                except SMTPException as e:
                    logging.warning("Problem with smtp server : %s" % (e))
                except Exception as e:
                    logging.warning(
                        "Unknown Problem while sending e-mail: %s" % (e))
    except Exception as e:
        b.status = BlastRun.ERROR
        b.message = str(e)
        b.save()
        logger.info("Error while checking blast run: %s" % (e))
        logging.exception("message")

    release_lock()
    logger.info("Pasteur blast runs checked")
Example #11
0
def launch_pasteur_blast(blastrunid, sequence, prog, db, evalue, coverage,
                         maxseqs):
    """
    Celery task that will launch a blast on the pasteur Galaxy Server
    """
    logging.info("Blasting %s with %s on %s" % (sequence, prog, db))
    b = BlastRun.objects.get(id=blastrunid)
    try:
        fasta_io = StringIO(sequence)
        records = list(SeqIO.parse(fasta_io, "fasta"))
        if len(records) == 1:
            galaxycon = galaxy_connection()
            galaxycon.nocache = True
            history = galaxycon.histories.create_history(name="BlastXplorer")

            b.history = history.get("id")
            b.query_id = biofile.cleanseqname(records[0].id)
            b.query_seq = records[0].seq
            b.evalue = evalue
            b.coverage = coverage
            b.database = db
            b.blastprog = prog
            b.maxseqs = maxseqs
            b.status = BlastRun.PENDING
            b.save()

            blast_type = BlastRun.blast_type(BlastRun.PASTEUR, prog)
            blast_inputtype = BlastRun.blast_inputtype(BlastRun.PASTEUR, prog)

            # We check alphabet of given sequence
            if ((blast_inputtype == "nt" and not biofile.check_nt(b.query_seq))
                    or (blast_inputtype == "aa"
                        and not biofile.check_aa(b.query_seq))):
                b.status = BlastRun.ERROR
                b.message = "The given sequence has the wrong alphabet. Program %s expects %s sequence" % (
                    blast_type, blast_inputtype)
            elif blast_type is not None:
                tmp_file = tempfile.NamedTemporaryFile()
                tmp_file.write(sequence)
                tmp_file.flush()
                if biofile.is_fasta_one_seq(tmp_file.name):
                    ## Upload input query file to galaxy
                    outputs = galaxycon.tools.upload_file(
                        path=tmp_file.name,
                        file_name="blastinput.fasta",
                        history_id=history.get("id"),
                        file_type="fasta")
                    file_id = outputs.get('outputs')[0].get('id')
                    ## Configuring job
                    tool_inputs = inputs()
                    tool_inputs.set_dataset_param("query", file_id)
                    tool_inputs.set_param("db_opts|database", db)
                    tool_inputs.set_param("blast_type", blast_type)
                    tool_inputs.set_param("evalue_cutoff", evalue)
                    tool_inputs.set_param("output|out_format", "5")
                    ## Running blast job
                    outputs = galaxycon.tools.run_tool(
                        history_id=history.get("id"),
                        tool_id=prog,
                        tool_inputs=tool_inputs)
                    b.history_fileid = outputs.get("outputs")[0].get("id")
                else:
                    b.status = BlastRun.ERROR
                    b.message = "Bad input FASTA file format"
            else:
                b.status = BlastRun.ERROR
                b.message = "Wrong blast program %s" % (prog)
            b.save()
        else:
            b.status = BlastRun.ERROR
            b.message = "More than one record in the fasta file! %d" % (len(
                list(records)))
    except Exception as e:
        logging.exception(str(e))
        b.status = BlastRun.ERROR
        b.message = str(e)
    b.save()
    time.sleep(30)
Example #12
0
def monitorworkspace(historyid):
    """
    Celery task that will monitor galaxy workspace

    It will update content of the workspace django model every 10 seconds

    It will wait for end of execution of all jobs
    and send a mail at the end, if the mail has been
    given by the user.
    
    this task is launched by workspace.views.HistoryDetailView
    only once, when the WorkspaceHistory is marked as "notmonitored"
    """
    galaxycon = galaxy_connection()
    galaxycon.nocache = True
    #print "Monitoring workspace " + historyid
    finished = False
    error = False
    email = None

    while not finished:
        try:
            hc = galaxycon.histories.show_history(historyid, contents=True)
            hi = galaxycon.histories.show_history(historyid)
            w = WorkspaceHistory.objects.get(history=historyid)
            w.history_content_json = json.dumps(hc)
            w.history_info_json = json.dumps(hi)
            w.save()

            finished = True
            for file in hc:
                if ('running' in file.get('state', '')
                        or 'queued' in file.get('state', '')
                        or 'new' in file.get('state', '')):
                    finished = False
                if 'error' in file.get('state', ''):
                    error = True
                    finished = True
        except:
            logging.warning('Problem with Galaxy server, waiting 1 minute')
            time.sleep(60)

        if not finished:
            time.sleep(10)

    w.finished = True
    w.save()
    logging.warning("history finished? %r" % (w.finished))

    if w and w.email and re.match(r"[^@]+@[^@]+\.[^@]+", w.email):
        try:
            message = "Dear NGPhylogeny user, \n\n"
            if error:
                message = message + "Your NGPhylogeny job finished with errors.\n\n"
            else:
                message = message + "Your NGPhylogeny job finished successfuly.\n"
            please = 'Please visit http://%s%s to check results\n\n' % (
                "ngphylogeny.fr",
                reverse('history_detail', kwargs={'history_id': historyid}))
            message = message + please
            message = message + "Thank you for using ngphylogeny.fr\n\n"
            message = message + "NGPhylogeny.fr development team.\n"
            send_mail(
                'NGPhylogeny.fr results',
                message,
                '*****@*****.**',
                [w.email],
                fail_silently=False,
            )
            print(message)
        except SMTPException as e:
            logging.warning("Problem with smtp server : %s" % (e))
        except Exception as e:
            logging.warning("Unknown Problem while sending e-mail: %s" % (e))
Example #13
0
def checkblastruns():
    """
    Every minutes, check running pasteur blast runs
    """
    logger.info("Start pasteur blast task check")

    ## To be sure that the task is not reexecuted in parallel while
    ## the previous one is still running
    lock_id = "lock_ngphylo_blastmonitoring"
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    release_lock = lambda: cache.delete(lock_id)

    if acquire_lock():
        pass
    else:
        return

    try:
        galaxycon = galaxy_connection()
        galaxycon.nocache = True
        
        for b in BlastRun.objects.filter(deleted=False, server=BlastRun.PASTEUR).filter(Q(status=BlastRun.PENDING) | Q(status=BlastRun.RUNNING)):
            # State of the output file we want (blast XML)
            dataset=galaxycon.histories.show_dataset(b.history,b.history_fileid)
            state=dataset.get('state')
            infos=dataset.get('misc_info')
            b.message=infos
    
            if state == 'ok':
                b.status=BlastRun.FINISHED
                blast_type = BlastRun.blast_type(BlastRun.PASTEUR, b.blastprog)
                ## Download the result file from galaxy first...
                tmp_file = tempfile.NamedTemporaryFile()
                galaxycon.datasets.download_dataset(b.history_fileid,tmp_file.name,False)
                query_seq_bk = b.query_seq
                frame = 1
                if blast_type == 'blastx' or blast_type == 'tblastx' :
                    frame=majorityQueryFrame(tmp_file.name)
                    b.query_seq = biofile.translate(str(b.query_seq), frame)
                    b.save()
                
                result_handle = open(tmp_file.name, "r")
                blast_records = NCBIXML.parse(result_handle)
                ms = PseudoMSA(b.query_id, b.query_seq, query_seq_bk, frame, blast_type)
                for blast_record in blast_records:
                    for alignment in blast_record.alignments:
                        for hsp in alignment.hsps:
                            e_val = hsp.expect
                            leng = float(hsp.align_length) / float(len(b.query_seq))
                            if e_val < b.evalue and leng >= b.coverage:
                                ms.add_hsp(biofile.newick_clean(alignment.title), hsp)

                if blast_type == 'blastx' or blast_type == 'tblastx' :
                    ms.crop_alignment(b.maxseqs)
                    b.query_seq = "".join(ms.query_seq)
                    b.save()

                nseq=0
                for id, seq, fullseq in ms.first_n_max_score_sequences(b.maxseqs):
                    s = BlastSubject(subject_id=id,
                                     subject_seq=seq,
                                     subject_fullseq=fullseq,
                                     blastrun=b)
                    s.save()
                    nseq+=1

                if nseq>0:
                    b.tree = b.build_nj_tree()
                    b.status = BlastRun.FINISHED
                    b.save()
                else:
                    b.status = BlastRun.ERROR
                    b.message = "Blast Search returned no results"
                    b.save()
            elif state == 'queued' or state == 'new':
                b.status=BlastRun.PENDING
            elif state == 'running':
                b.status=BlastRun.RUNNING
            else:
                b.status=BlastRun.ERROR
            b.save()
    
            if b.email is not None and re.match(r"[^@]+@[^@]+\.[^@]+", b.email) and (b.status == BlastRun.ERROR or b.status == BlastRun.FINISHED):
                try:
                    message = "Dear NGPhylogeny user, \n\n"
                    if b.status != b.FINISHED:
                        message = message + "Your NGPhylogeny BLAST job finished with errors.\n\n"
                    else:
                        message = message + "Your NGPhylogeny BLAST job finished successfuly.\n"
                    please = 'Please visit http://%s%s to check results\n\n' % (
                        "ngphylogeny.fr", reverse('blast_view', kwargs={'pk': b.id}))
                    message = message + please
                    message = message + "Thank you for using ngphylogeny.fr\n\n"
                    message = message + "NGPhylogeny.fr development team.\n"
                    send_mail(
                        'NGPhylogeny.fr BLAST results',
                        message,
                        '*****@*****.**',
                        [b.email],
                        fail_silently=False,
                    )
                except SMTPException as e:
                    logging.warning("Problem with smtp server : %s" % (e))
                except Exception as e:
                    logging.warning(
                        "Unknown Problem while sending e-mail: %s" % (e))
    except Exception as e:
        b.status=BlastRun.ERROR
        b.message=str(e)
        b.save()
        logger.info("Error while checking blast run: %s" % (e))
        logging.exception("message")

    release_lock()
    logger.info("Pasteur blast runs checked")
Example #14
0
def launch_pasteur_blast(blastrunid, sequence, prog, db, evalue, coverage, maxseqs):
    """
    Celery task that will launch a blast on the pasteur Galaxy Server
    """
    logging.info("Blasting %s with %s on %s" % (sequence, prog, db))
    b = BlastRun.objects.get(id=blastrunid)
    try:
        fasta_io = StringIO(sequence)
        records = list(SeqIO.parse(fasta_io, "fasta"))
        if len(records) == 1:
            galaxycon = galaxy_connection()
            galaxycon.nocache = True
            history = galaxycon.histories.create_history(name="BlastXplorer")
            
            b.history = history.get("id")
            b.query_id = biofile.cleanseqname(records[0].id)
            b.query_seq = records[0].seq
            b.evalue = evalue
            b.coverage = coverage
            b.database = db
            b.blastprog = prog
            b.maxseqs = maxseqs
            b.status = BlastRun.PENDING
            b.save()

            blast_type = BlastRun.blast_type(BlastRun.PASTEUR, prog)
            blast_inputtype = BlastRun.blast_inputtype(BlastRun.PASTEUR, prog)
            
            # We check alphabet of given sequence
            if ((blast_inputtype == "nt" and not biofile.check_nt(b.query_seq)) or
                (blast_inputtype == "aa" and not biofile.check_aa(b.query_seq))):
                b.status = BlastRun.ERROR
                b.message = "The given sequence has the wrong alphabet. Program %s expects %s sequence" % (
                    blast_type, blast_inputtype)
            elif blast_type is not None:
                tmp_file = tempfile.NamedTemporaryFile()
                tmp_file.write(sequence)
                tmp_file.flush()
                if biofile.is_fasta_one_seq(tmp_file.name):
                    ## Upload input query file to galaxy
	            outputs = galaxycon.tools.upload_file(path=tmp_file.name,file_name="blastinput.fasta",history_id=history.get("id"),file_type="fasta")
	            file_id = outputs.get('outputs')[0].get('id')
                    ## Configuring job
	            tool_inputs=inputs()
	            tool_inputs.set_dataset_param("query",file_id)
	            tool_inputs.set_param("db_opts|database", db)
	            tool_inputs.set_param("blast_type", blast_type)
	            tool_inputs.set_param("evalue_cutoff", evalue)
	            tool_inputs.set_param("output|out_format", "5")
                    ## Running blast job
	            outputs=galaxycon.tools.run_tool(history_id=history.get("id"),tool_id=prog,tool_inputs=tool_inputs)
                    b.history_fileid = outputs.get("outputs")[0].get("id")
                else:
                    b.status=BlastRun.ERROR
                    b.message="Bad input FASTA file format"
            else:
                b.status=BlastRun.ERROR
                b.message="Wrong blast program %s" % (prog)
            b.save()
        else:
            b.status = BlastRun.ERROR
            b.message = "More than one record in the fasta file! %d" % (
                len(list(records)))
    except Exception as e:
        logging.exception(str(e))
        b.status = BlastRun.ERROR
        b.message = str(e)
    b.save()
    time.sleep(30)