def submit_job(self, job): id = uuid.uuid1() job['id'] = id print("Job %s created %s" % (job['id'], job['created'].strftime('%Y.%m.%d %H:%M:%S'))) create_job_in_db(job) # put job in queue try: q.put(job, block=False) except Full as e: traceback.print_stack() traceback.print_exc() update_job_status(job['id'], "error") raise # update status to queued try: q_len = q.qsize() if (q_len >= QUEUE_WARN_SIZE): adminEmailer.warn("Queue is getting too long. There are currently %d items in the queue." % q_len) update_job_status(job['id'], "queued", "queue length %d" % q_len) except NotImplementedError: print("q.qsize not supported") update_job_status(job['id'], "queued") return id
def run(config, job_uuid, genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet=10, viral=False): species = get_species_by_mirbase_id(mirbase_species) if bgModel=='3p': bgModel = species['weeder'] else: bgModel = species['weeder'].rstrip('3P') sequence_file = os.path.join(config.get('General', 'data_dir'), "p3utrSeqs_" + species['ucsc_name'] + ".csv") cut = float(cut) curRunNum = randint(0,1000000) # translate gene identifiers to entrez IDs print "translating gene identifiers from %s to entrez IDs" % (geneId) genes = map_genes_to_entrez_ids(job_uuid, geneId, mirbase_species) print "genes = " + str(genes) # 1. Read in sequences seqFile = open(sequence_file,'r') seqLines = seqFile.readlines() ids = [i.strip().split(',')[0].upper() for i in seqLines] sequences = [i.strip().split(',')[1] for i in seqLines] seqs = dict(zip(ids,sequences)) seqFile.close() # 2. Get sequences for each target miRSeqs = {} for gene in genes: if gene in seqs: miRSeqs[gene] = seqs[gene] # if there are no matching sequences, bail out w/ a reasonable error message. if (len(miRSeqs)==0): print("no matching sequences found for genes in job " + str(job_uuid)) update_job_status(job_uuid, "error", "No sequences found for the genes entered.") return False # record whether a sequence was found for each gene # previously stored when job was created (create_job_in_db) set_genes_annotated(job_uuid, miRSeqs) # 3. Make a FASTA file fasta_dir = os.path.join(config.get('General', 'tmp_dir'), 'fasta') if not os.path.exists(fasta_dir): os.makedirs(fasta_dir) fasta_fname = os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta') with open(fasta_fname, 'w') as fastaFile: for seq in miRSeqs: fastaFile.write('>'+str(seq)+'\n'+str(miRSeqs[seq])+'\n') # 4. Run weeder print 'Running weeder!' update_job_status(job_uuid, "running weeder") weederPSSMs1 = weeder(config, seqFile=fasta_fname, percTargets=50, revComp=False, bgModel=bgModel) # 4a. Take only selected size motifs weederPSSMsTmp = [] for pssm1 in weederPSSMs1: png_path = os.path.join(config.get('General', 'pssm_images_dir'), str(job_uuid) + '_' + pssm1.getName() + '.png') if 6 in motifSizes and len(pssm1.getName())==6: weederPSSMsTmp.append(deepcopy(pssm1)) plotPssm(pssm1, png_path) if 8 in motifSizes and len(pssm1.getName())==8: weederPSSMsTmp.append(deepcopy(pssm1)) plotPssm(pssm1, png_path) print("pssm name = " + pssm1.getName()) weederPSSMs1 = deepcopy(weederPSSMsTmp) del weederPSSMsTmp # 5. Run miRvestigator HMM update_job_status(job_uuid, "computing miRvestigator HMM") mV = miRvestigator(config, weederPSSMs1, seqs.values(), seedModel=seedModels, minor=True, p5=True, p3=True, wobble=wobble, wobbleCut=cut, textOut=False, species=mirbase_species, viral = viral) # 6. Clean-up after yerself os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta')) os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta.wee')) os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta.mix')) os.remove(os.path.join(fasta_dir, 'tmp' + str(curRunNum) + '.fasta.html')) # 7. write output to database update_job_status(job_uuid, "compiling results") for pssm in weederPSSMs1: motif_id = store_motif(job_uuid, pssm) scores = mV.getScoreList(pssm.getName()) store_mirvestigator_scores(motif_id, scores) update_job_status(job_uuid, "done") return True
def start_worker(id, q): print("worker %d started" % (id)) while (True): job = q.get() if (job==SHUTDOWN_FLAG): break update_job_status(job['id'], "started on worker %d" % (id)) print("worker %d computing job %s." % (id, job['id'])) # parse params out of job print job genes = job['genes'] geneId = job['geneId'] wobble = (job['wobble'] == 'yes') cut = float(job['cut']) jobName = job['jobName'] topRet = job['topRet'] mirbase_species = job['species'] notify_mail = job['notify_mail'] bgModel = job['bgModel'] if job['viral']=='True': viral = True else: viral = False # condense seed models and motif sizes into arrays of ints seedModels = [int(job[s]) for s in ['s6','s7','s8'] if s in job and job[s]] motifSizes = [int(job[m]) for m in ['m6', 'm8'] if m in job and job[m]] try: # run the job r = mirv_worker.run(job['id'], genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet, viral) # notify on success if r: print("worker %d finished job %s." % (id, job['id'])) if (notify_mail): adminEmailer.notify_complete( notify_mail.split(","), str(job['id']), jobName ) else: print("worker %d, job %s failed." % (id, job['id'])) except Exception as e: print("Exception in mirv_worker %d on job %s." % (id, str(job['id']))) traceback.print_stack() traceback.print_exc() try: update_job_status(job['id'], 'error') except Exception as e2: traceback.print_stack() traceback.print_exc() try: adminEmailer.warn(error_msg_template % (id, str(job['id']), traceback.format_stack(), traceback.format_exc(),)) except Exception as e2: traceback.print_stack() traceback.print_exc() try: if (notify_mail): #recipients, job_uuid, job_name adminEmailer.notify_error(notify_mail.split(","), str(job['id']), jobName) except Exception as e2: traceback.print_stack() traceback.print_exc() print("worker %d exiting." % (id))
def run(job_uuid, genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet=10, viral=False): species = get_species_by_mirbase_id(mirbase_species) if bgModel=='3p': bgModel = species['weeder'] else: bgModel = species['weeder'].rstrip('3P') sequence_file = conf.data_dir+"/p3utrSeqs_" + species['ucsc_name'] + ".csv" cut = float(cut) curRunNum = randint(0,1000000) # translate gene identifiers to entrez IDs print "translating gene identifiers from %s to entrez IDs" % (geneId) genes = map_genes_to_entrez_ids(job_uuid, geneId, mirbase_species) print "genes = " + str(genes) # 1. Read in sequences seqFile = open(sequence_file,'r') seqLines = seqFile.readlines() ids = [i.strip().split(',')[0].upper() for i in seqLines] sequences = [i.strip().split(',')[1] for i in seqLines] seqs = dict(zip(ids,sequences)) seqFile.close() #update_job_status(job, "finished reading sequence file") # 2. Get sequences for each target miRSeqs = {} for gene in genes: if gene in seqs: miRSeqs[gene] = seqs[gene] # if there are no matching sequences, bail out w/ a reasonable error message. if (len(miRSeqs)==0): print("no matching sequences found for genes in job " + str(job_uuid)) update_job_status(job_uuid, "error", "No sequences found for the genes entered.") return False # record whether a sequence was found for each gene # previously stored when job was created (create_job_in_db) set_genes_annotated(job_uuid, miRSeqs) # 3. Make a FASTA file if not os.path.exists(conf.tmp_dir+'/fasta'): os.makedirs(conf.tmp_dir+'/fasta') fastaFile = open(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta','w') for seq in miRSeqs: fastaFile.write('>'+str(seq)+'\n'+str(miRSeqs[seq])+'\n') fastaFile.close() # 4. Run weeder print 'Running weeder!' update_job_status(job_uuid, "running weeder") weederPSSMs1 = weeder(seqFile=conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta', percTargets=50, revComp=False, bgModel=bgModel) # 4a. Take only selected size motifs weederPSSMsTmp = [] for pssm1 in weederPSSMs1: if 6 in motifSizes and len(pssm1.getName())==6: weederPSSMsTmp.append(deepcopy(pssm1)) plotPssm(pssm1,conf.pssm_images_dir+'/'+str(job_uuid)+'_'+pssm1.getName()+'.png') if 8 in motifSizes and len(pssm1.getName())==8: weederPSSMsTmp.append(deepcopy(pssm1)) plotPssm(pssm1,conf.pssm_images_dir+'/'+str(job_uuid)+'_'+pssm1.getName()+'.png') print("pssm name = " + pssm1.getName()) weederPSSMs1 = deepcopy(weederPSSMsTmp) del weederPSSMsTmp # 5. Run miRvestigator HMM update_job_status(job_uuid, "computing miRvestigator HMM") mV = miRvestigator(weederPSSMs1, seqs.values(), seedModel=seedModels, minor=True, p5=True, p3=True, wobble=wobble, wobbleCut=cut, textOut=False, species=mirbase_species, viral = viral) # 6. Read in miRNAs to get mature miRNA ids # import gzip # miRNAFile = gzip.open('mature.fa.gz','r') # miRNADict = {} # while 1: # miRNALine = miRNAFile.readline() # seqLine = miRNAFile.readline() # if not miRNALine: # break # # Get the miRNA name # miRNAData = miRNALine.lstrip('>').split(' ') # curMiRNA = miRNAData[0] # if (curMiRNA.split('-'))[0]=='hsa': # miRNADict[curMiRNA] = miRNAData[1] # miRNAFile.close() # 6. Clean-up after yerself os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta') os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta.wee') os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta.mix') os.remove(conf.tmp_dir+'/fasta/tmp'+str(curRunNum)+'.fasta.html') # 7. write output to database update_job_status(job_uuid, "compiling results") for pssm in weederPSSMs1: motif_id = store_motif(job_uuid, pssm) scores = mV.getScoreList(pssm.getName()) store_mirvestigator_scores(motif_id, scores) update_job_status(job_uuid, "done") return True
def start_worker(id, q, config): print("worker %d started" % (id)) while (True): job = q.get() if (job==SHUTDOWN_FLAG): break update_job_status(job['id'], "started on worker %d" % (id)) print("worker %d computing job %s." % (id, job['id'])) # parse params out of job print job genes = job['genes'] geneId = job['geneId'] wobble = (job['wobble'] == 'yes') cut = float(job['cut']) jobName = job['jobName'] topRet = job['topRet'] mirbase_species = job['species'] notify_mail = job['notify_mail'] bgModel = job['bgModel'] if job['viral']=='True': viral = True else: viral = False # condense seed models and motif sizes into arrays of ints seedModels = [int(job[s]) for s in ['s6','s7','s8'] if s in job and job[s]] motifSizes = [int(job[m]) for m in ['m6', 'm8'] if m in job and job[m]] try: # run the job r = mirv_worker.run(config, job['id'], genes, geneId, seedModels, wobble, cut, motifSizes, jobName, mirbase_species, bgModel, topRet, viral) # notify on success if r: print("worker %d finished job %s." % (id, job['id'])) if (notify_mail): adminEmailer.notify_complete( notify_mail.split(","), str(job['id']), jobName ) else: print("worker %d, job %s failed." % (id, job['id'])) except Exception as e: print("Exception in mirv_worker %d on job %s." % (id, str(job['id']))) traceback.print_stack() traceback.print_exc() try: update_job_status(job['id'], 'error') except Exception as e2: traceback.print_stack() traceback.print_exc() try: adminEmailer.warn(error_msg_template % (id, str(job['id']), traceback.format_stack(), traceback.format_exc(),)) except Exception as e2: traceback.print_stack() traceback.print_exc() try: if (notify_mail): #recipients, job_uuid, job_name adminEmailer.notify_error(notify_mail.split(","), str(job['id']), jobName) except Exception as e2: traceback.print_stack() traceback.print_exc() print("worker %d exiting." % (id))