Esempio n. 1
0
    def submit_job(self, job):
        id = uuid.uuid1()
        job['id'] = id
        print("Job %s created %s" % (job['id'], job['created'].strftime('%Y.%m.%d %H:%M:%S')))
        create_job_in_db(job)

        # put job in queue
        try:
            q.put(job, block=False)
        except Full as e:
            traceback.print_stack()
            traceback.print_exc()
            update_job_status(job['id'], "error")
            raise

        # update status to queued
        try:
            q_len = q.qsize()
            if (q_len >= QUEUE_WARN_SIZE):
                adminEmailer.warn("Queue is getting too long. There are currently %d items in the queue." % q_len)
            update_job_status(job['id'], "queued", "queue length %d" % q_len)
        except NotImplementedError:
            print("q.qsize not supported")
            update_job_status(job['id'], "queued")
        return id
Esempio n. 2
0
    def submit_job(self, job):
        id = uuid.uuid1()
        job['id'] = id
        print("Job %s created %s" % (job['id'], job['created'].strftime('%Y.%m.%d %H:%M:%S')))
        create_job_in_db(job)

        # put job in queue
        try:
            q.put(job, block=False)
        except Full as e:
            traceback.print_stack()
            traceback.print_exc()
            update_job_status(job['id'], "error")
            raise

        # update status to queued
        try:
            q_len = q.qsize()
            if (q_len >= QUEUE_WARN_SIZE):
                adminEmailer.warn("Queue is getting too long. There are currently %d items in the queue." % q_len)
            update_job_status(job['id'], "queued", "queue length %d" % q_len)
        except NotImplementedError:
            print("q.qsize not supported")
            update_job_status(job['id'], "queued")
        return id
def run(config, job_uuid, genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet=10, viral=False):

    species = get_species_by_mirbase_id(mirbase_species)
    if bgModel=='3p':
        bgModel = species['weeder']
    else:
        bgModel = species['weeder'].rstrip('3P')
    sequence_file = os.path.join(config.get('General', 'data_dir'),
                                 "p3utrSeqs_" + species['ucsc_name'] + ".csv")

    cut = float(cut)
    curRunNum = randint(0,1000000)

    # translate gene identifiers to entrez IDs
    print "translating gene identifiers from %s to entrez IDs" % (geneId)
    genes = map_genes_to_entrez_ids(job_uuid, geneId, mirbase_species)
    print "genes = " + str(genes)

    # 1. Read in sequences
    seqFile = open(sequence_file,'r')
    seqLines = seqFile.readlines()
    ids = [i.strip().split(',')[0].upper() for i in seqLines]
    sequences = [i.strip().split(',')[1] for i in seqLines]
    seqs = dict(zip(ids,sequences))
    seqFile.close()

    # 2. Get sequences for each target
    miRSeqs = {}
    for gene in genes:
        if gene in seqs:
            miRSeqs[gene] = seqs[gene]

    # if there are no matching sequences, bail out w/ a reasonable error message.
    if (len(miRSeqs)==0):
        print("no matching sequences found for genes in job " + str(job_uuid))
        update_job_status(job_uuid, "error", "No sequences found for the genes entered.")
        return False

    # record whether a sequence was found for each gene
    # previously stored when job was created (create_job_in_db)
    set_genes_annotated(job_uuid, miRSeqs)

    # 3. Make a FASTA file
    fasta_dir = os.path.join(config.get('General', 'tmp_dir'), 'fasta')
    if not os.path.exists(fasta_dir):
        os.makedirs(fasta_dir)
    fasta_fname = os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta')
    with open(fasta_fname, 'w') as fastaFile:
        for seq in miRSeqs:
            fastaFile.write('>'+str(seq)+'\n'+str(miRSeqs[seq])+'\n')

    # 4. Run weeder
    print 'Running weeder!'
    update_job_status(job_uuid, "running weeder")
    weederPSSMs1 = weeder(config,
                          seqFile=fasta_fname,
                          percTargets=50,
                          revComp=False,
                          bgModel=bgModel)

    # 4a. Take only selected size motifs
    weederPSSMsTmp = []
    for pssm1 in weederPSSMs1:
        png_path = os.path.join(config.get('General', 'pssm_images_dir'),
                                str(job_uuid) + '_' + pssm1.getName() + '.png')
        if 6 in motifSizes and len(pssm1.getName())==6:
            weederPSSMsTmp.append(deepcopy(pssm1))
            plotPssm(pssm1, png_path)
        if 8 in motifSizes and len(pssm1.getName())==8:
            weederPSSMsTmp.append(deepcopy(pssm1))
            plotPssm(pssm1, png_path)
        print("pssm name = " + pssm1.getName())
    weederPSSMs1 = deepcopy(weederPSSMsTmp)
    del weederPSSMsTmp

    # 5. Run miRvestigator HMM
    update_job_status(job_uuid, "computing miRvestigator HMM")
    mV = miRvestigator(config, weederPSSMs1, seqs.values(),
                       seedModel=seedModels,
                       minor=True,
                       p5=True, p3=True,
                       wobble=wobble, wobbleCut=cut,
                       textOut=False,
                       species=mirbase_species,
                       viral = viral)

    # 6. Clean-up after yerself
    os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta'))
    os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta.wee'))
    os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta.mix'))
    os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta.html'))

    # 7. write output to database
    update_job_status(job_uuid, "compiling results")

    for pssm in weederPSSMs1:
        motif_id = store_motif(job_uuid, pssm)
        scores = mV.getScoreList(pssm.getName())
        store_mirvestigator_scores(motif_id, scores)

    update_job_status(job_uuid, "done")
    return True
Esempio n. 4
0
def start_worker(id, q):
    print("worker %d started" % (id))
    while (True):
        job = q.get()
        if (job==SHUTDOWN_FLAG):
            break
        update_job_status(job['id'], "started on worker %d" % (id))
        print("worker %d computing job %s." % (id, job['id']))

        # parse params out of job
        print job
        genes = job['genes']
        geneId = job['geneId']
        wobble = (job['wobble'] == 'yes')
        cut = float(job['cut'])
        jobName = job['jobName']
        topRet = job['topRet']
        mirbase_species = job['species']
        notify_mail = job['notify_mail']
        bgModel = job['bgModel']
        if job['viral']=='True':
            viral = True
        else:
            viral = False

        # condense seed models and motif sizes into arrays of ints
        seedModels = [int(job[s]) for s in ['s6','s7','s8'] if s in job and job[s]]
        motifSizes = [int(job[m]) for m in ['m6', 'm8'] if m in job and job[m]]

        try:
            # run the job
            r = mirv_worker.run(job['id'], genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet, viral)

            # notify on success
            if r:
                print("worker %d finished job %s." % (id, job['id']))
                if (notify_mail):
                    adminEmailer.notify_complete( notify_mail.split(","), str(job['id']), jobName )
            else:
                print("worker %d, job %s failed." % (id, job['id']))

        except Exception as e:
            print("Exception in mirv_worker %d on job %s." %  (id, str(job['id'])))
            traceback.print_stack()
            traceback.print_exc()
            try:
                update_job_status(job['id'], 'error')
            except Exception as e2:
                traceback.print_stack()
                traceback.print_exc()
            try:
                adminEmailer.warn(error_msg_template %  (id, str(job['id']), traceback.format_stack(), traceback.format_exc(),))
            except Exception as e2:
                traceback.print_stack()
                traceback.print_exc()
            try:
                if (notify_mail):
                    #recipients, job_uuid, job_name
                    adminEmailer.notify_error(notify_mail.split(","), str(job['id']), jobName)
            except Exception as e2:
                traceback.print_stack()
                traceback.print_exc()

    print("worker %d exiting." % (id))
Esempio n. 5
0
def run(job_uuid, genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet=10, viral=False):

    species = get_species_by_mirbase_id(mirbase_species)
    if bgModel=='3p':
        bgModel = species['weeder']
    else:
        bgModel = species['weeder'].rstrip('3P')
    sequence_file = conf.data_dir+"/p3utrSeqs_" + species['ucsc_name'] + ".csv"

    cut = float(cut)
    curRunNum = randint(0,1000000)

    # translate gene identifiers to entrez IDs
    print "translating gene identifiers from %s to entrez IDs" % (geneId)
    genes = map_genes_to_entrez_ids(job_uuid, geneId, mirbase_species)
    print "genes = " + str(genes)

    # 1. Read in sequences
    seqFile = open(sequence_file,'r')
    seqLines = seqFile.readlines()
    ids = [i.strip().split(',')[0].upper() for i in seqLines]
    sequences = [i.strip().split(',')[1] for i in seqLines]
    seqs = dict(zip(ids,sequences))
    seqFile.close()

    #update_job_status(job, "finished reading sequence file")

    # 2. Get sequences for each target
    miRSeqs = {}
    for gene in genes:
        if gene in seqs:
            miRSeqs[gene] = seqs[gene]

    # if there are no matching sequences, bail out w/ a reasonable error message.
    if (len(miRSeqs)==0):
        print("no matching sequences found for genes in job " + str(job_uuid))
        update_job_status(job_uuid, "error", "No sequences found for the genes entered.")
        return False

    # record whether a sequence was found for each gene
    # previously stored when job was created (create_job_in_db)
    set_genes_annotated(job_uuid, miRSeqs)

    # 3. Make a FASTA file
    if not os.path.exists(conf.tmp_dir+'/fasta'):
        os.makedirs(conf.tmp_dir+'/fasta')
    fastaFile = open(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta','w')
    for seq in miRSeqs:
        fastaFile.write('>'+str(seq)+'\n'+str(miRSeqs[seq])+'\n')
    fastaFile.close()

    # 4. Run weeder
    print 'Running weeder!'
    update_job_status(job_uuid, "running weeder")
    weederPSSMs1 = weeder(seqFile=conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta', percTargets=50, revComp=False, bgModel=bgModel)

    # 4a. Take only selected size motifs
    weederPSSMsTmp = []
    for pssm1 in weederPSSMs1:
        if 6 in motifSizes and len(pssm1.getName())==6:
            weederPSSMsTmp.append(deepcopy(pssm1))
            plotPssm(pssm1,conf.pssm_images_dir+'/'+str(job_uuid)+'_'+pssm1.getName()+'.png')
        if 8 in motifSizes and len(pssm1.getName())==8:
            weederPSSMsTmp.append(deepcopy(pssm1))
            plotPssm(pssm1,conf.pssm_images_dir+'/'+str(job_uuid)+'_'+pssm1.getName()+'.png')
        print("pssm name = " + pssm1.getName())
    weederPSSMs1 = deepcopy(weederPSSMsTmp)
    del weederPSSMsTmp

    # 5. Run miRvestigator HMM
    update_job_status(job_uuid, "computing miRvestigator HMM")
    mV = miRvestigator(weederPSSMs1, seqs.values(),
                       seedModel=seedModels,
                       minor=True,
                       p5=True, p3=True,
                       wobble=wobble, wobbleCut=cut,
                       textOut=False,
                       species=mirbase_species,
                       viral = viral)

    # 6. Read in miRNAs to get mature miRNA ids
    # import gzip
    # miRNAFile = gzip.open('mature.fa.gz','r')
    # miRNADict = {}
    # while 1:
    #     miRNALine = miRNAFile.readline()
    #     seqLine = miRNAFile.readline()
    #     if not miRNALine:
    #         break
    #     # Get the miRNA name
    #     miRNAData = miRNALine.lstrip('>').split(' ')
    #     curMiRNA = miRNAData[0]
    #     if (curMiRNA.split('-'))[0]=='hsa':
    #         miRNADict[curMiRNA] = miRNAData[1]
    # miRNAFile.close()

    # 6. Clean-up after yerself
    os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta')
    os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta.wee')
    os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta.mix')
    os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta.html')

    # 7. write output to database
    update_job_status(job_uuid, "compiling results")

    for pssm in weederPSSMs1:
        motif_id = store_motif(job_uuid, pssm)
        scores = mV.getScoreList(pssm.getName())
        store_mirvestigator_scores(motif_id, scores)


    update_job_status(job_uuid, "done")
    return True
Esempio n. 6
0
def start_worker(id, q, config):
    print("worker %d started" % (id))
    while (True):
        job = q.get()
        if (job==SHUTDOWN_FLAG):
            break
        update_job_status(job['id'], "started on worker %d" % (id))
        print("worker %d computing job %s." % (id, job['id']))

        # parse params out of job
        print job
        genes = job['genes']
        geneId = job['geneId']
        wobble = (job['wobble'] == 'yes')
        cut = float(job['cut'])
        jobName = job['jobName']
        topRet = job['topRet']
        mirbase_species = job['species']
        notify_mail = job['notify_mail']
        bgModel = job['bgModel']
        if job['viral']=='True':
            viral = True
        else:
            viral = False

        # condense seed models and motif sizes into arrays of ints
        seedModels = [int(job[s]) for s in ['s6','s7','s8'] if s in job and job[s]]
        motifSizes = [int(job[m]) for m in ['m6', 'm8'] if m in job and job[m]]

        try:
            # run the job
            r = mirv_worker.run(config, job['id'], genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet, viral)

            # notify on success
            if r:
                print("worker %d finished job %s." % (id, job['id']))
                if (notify_mail):
                    adminEmailer.notify_complete( notify_mail.split(","), str(job['id']), jobName )
            else:
                print("worker %d, job %s failed." % (id, job['id']))

        except Exception as e:
            print("Exception in mirv_worker %d on job %s." %  (id, str(job['id'])))
            traceback.print_stack()
            traceback.print_exc()
            try:
                update_job_status(job['id'], 'error')
            except Exception as e2:
                traceback.print_stack()
                traceback.print_exc()
            try:
                adminEmailer.warn(error_msg_template %  (id, str(job['id']), traceback.format_stack(), traceback.format_exc(),))
            except Exception as e2:
                traceback.print_stack()
                traceback.print_exc()
            try:
                if (notify_mail):
                    #recipients, job_uuid, job_name
                    adminEmailer.notify_error(notify_mail.split(","), str(job['id']), jobName)
            except Exception as e2:
                traceback.print_stack()
                traceback.print_exc()

    print("worker %d exiting." % (id))