def process_amplicon_job(job_queue, sequence_source, dg_calc): remaining = job_queue.remaining(job_type=(JOB_ID_PROCESS_TAQMAN_AMPLICON, JOB_ID_PROCESS_LOCATION_AMPLICON, JOB_ID_PROCESS_SNP_AMPLICON, JOB_ID_PROCESS_GEX_TAQMAN_TRANSCRIPT)) for job in remaining: if job.type == JOB_ID_PROCESS_TAQMAN_AMPLICON: process_taqman_job(job, job_queue, sequence_source, dg_calc) elif job.type == JOB_ID_PROCESS_LOCATION_AMPLICON: process_location_job(job, job_queue, sequence_source, dg_calc) elif job.type == JOB_ID_PROCESS_SNP_AMPLICON: process_snp_job(job, job_queue, sequence_source, dg_calc) elif job.type == JOB_ID_PROCESS_GEX_TAQMAN_TRANSCRIPT: process_transcript_job(job, job_queue, sequence_source, dg_calc) Session.close()
def process_reprocess_job( job_queue, config ): logger = logging.getLogger(LOGGER_NAME) #mark finished first, #in_progress = job_queue.in_progress(job_type=JOB_ID_PROCESS_REPROCESS) in_progress = job_queue.in_progress(job_type=JOB_ID_REPROCESS_QLTESTER) for job in in_progress: job_queue.finish_tree(job, None) if job_queue.is_job_done(job): logger.info("Job finished [job %s]" % job.id) # We might want to add a flag to the reprocess group/algorthm to prevent multiple starts # Below is an example of what to do... #args = job_queue.get_job_input_params(job) #assay = Session.query(SequenceGroup).get(args.sequence_group_id) #if assay: # assay.analyzed = True # Session.commit() # now process remaining jobs in que remaining = job_queue.remaining(job_type=(JOB_ID_REPROCESS_QLTESTER,JOB_ID_REPROCESS_LOAD_QTOOLS)) for job in remaining: if job.type == JOB_ID_REPROCESS_QLTESTER: analysis_group, reprocessor = retreive_and_validate_inputs( job, job_queue, logger ) ## Try to launch connection try: import paramiko import time qltester_host = config['qtools.qltester.host'] qltester_pw = config['qtools.qltester.pw'] qltester_user = config['qtools.qltester.user'] qltester_cmd = 'python ./run_reprocessor.py ' + str( analysis_group.id ) + ' ' + reprocessor.code client = paramiko.SSHClient() client.load_system_host_keys() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(qltester_host, 22, qltester_user, qltester_pw) chan = client.get_transport().open_session() chan.exec_command( qltester_cmd ) while ( not chan.exit_status_ready() ): time.sleep(10) reprocess_result = chan.recv_exit_status() client.close() except Exception: # DB timeout: abort job. logger.exception("Error from Reprocessor worker:") job_queue.abort(job, JSONErrorMessage("Unable to connect to qltester.")) continue if ( reprocess_result == 0 ): ## if sucesful, update db? logger.info("Reprocess process job finished [job %s]" % job.id) Session.commit() ## add step-2 to job que.. message = JSONMessage(analysis_group_id=analysis_group.id,reprocess_config_id=reprocessor.id) job_queue.add(JOB_ID_REPROCESS_LOAD_QTOOLS, message, parent_job=job) #mark progress job_queue.progress(job) elif( reprocess_result == 1 ): #logger is currently processing anouther job try again later.... logger.info("Reprocessor is busy, process job [job %s] will be attempted again later" % job.id ) else: logger.info("Reprocess process job failed [job %s] qltester exit code %d" % (job.id, reprocess_result) ) job_queue.abort(job, JSONErrorMessage("Reprocessor failed on qltester: Non-zero result status (%d)" % reprocess_result)) elif job.type == JOB_ID_REPROCESS_LOAD_QTOOLS: analysis_group, reprocessor = retreive_and_validate_inputs( job, job_queue, logger ) try: result = update_reprocess_analysis_group_data(analysis_group, reprocessor, config, logger) except Exception: logger.exception("Error from Reprocess worker:") job_queue.abort(job, JSONErrorMessage("Unable add to qtools")) continue # avoid condition where job completed -- clear child job first if ( result == 0 ): job_queue.finish(job, None) else: logger.exception("Error from Reprocess worker [job %s]: Non-zero result status (%d)" % job.id, result ) job_queue.abort(job, JSONErrorMessage("Unable add to qtools: Non-zero result status (%d)" % result)) continue # this is key; otherwise, the SQL connection pool will be sucked up. Session.close()
def process_assay_job(job_queue, tm_calc, dg_calc): logger = logging.getLogger(LOGGER_NAME) # mark finished first in_progress = job_queue.in_progress(job_type=JOB_ID_PROCESS_ASSAY) for job in in_progress: job_queue.finish_tree(job, None) if job_queue.is_job_done(job): logger.info("Job finished [job %s]" % job.id) args = job_queue.get_job_input_params(job) assay = Session.query(SequenceGroup).get(args.sequence_group_id) if assay: assay.analyzed = True Session.commit() remaining = job_queue.remaining(job_type=JOB_ID_PROCESS_ASSAY) for job in remaining: struct = JSONMessage.unserialize(job.input_message) sg_id = struct.sequence_group_id sg = Session.query(SequenceGroup).get(sg_id) # TODO: I think the parent function should have cleared out the amplicons, # but I need to make that choice if not sg: logger.error("Unknown sequence group id: %s [job %s]" % (sg_id, job.id)) job_queue.abort(job, JSONErrorMessage("Unknown sequence group id: %s" % sg_id)) if sg.kit_type == SequenceGroup.TYPE_DESIGNED: probe_ids = [p.id for p in sg.probes] # TODO: transaction? if sg.type == SequenceGroup.ASSAY_TYPE_GEX: job_type = JOB_ID_PROCESS_GEX_TAQMAN_TRANSCRIPT else: job_type = JOB_ID_PROCESS_TAQMAN_AMPLICON for fp in sg.forward_primers: for rp in sg.reverse_primers: job_queue.add(job_type, ProcessPrimerAmpliconMessage(sequence_group_id=sg.id, forward_primer_id=fp.id, reverse_primer_id=rp.id, probe_ids=probe_ids), parent_job=job) # TM, DG of sequence components right here for fp in sg.forward_primers: fp.tm = tm_calc.tm_primer(fp.sequence.sequence) fp.dg = dg_calc.delta_g(fp.sequence.sequence) for rp in sg.reverse_primers: rp.tm = tm_calc.tm_primer(rp.sequence.sequence) rp.dg = dg_calc.delta_g(rp.sequence.sequence) for p in sg.probes: if p.quencher and p.quencher.upper() == 'MGB': p.tm = tm_calc.tm_probe(p.sequence.sequence, mgb=True) p.dg = dg_calc.delta_g(p.sequence.sequence) else: p.tm = tm_calc.tm_probe(p.sequence.sequence, mgb=False) p.dg = dg_calc.delta_g(p.sequence.sequence) Session.commit() elif sg.kit_type == SequenceGroup.TYPE_LOCATION: job_queue.add(JOB_ID_PROCESS_LOCATION_AMPLICON, ProcessLocationAmpliconMessage(sequence_group_id=sg.id), parent_job=job) elif sg.kit_type == SequenceGroup.TYPE_SNP: job_queue.add(JOB_ID_PROCESS_SNP_RSID, ProcessSNPRSIDMessage(sequence_group_id=sg.id), parent_job=job) # TODO: need to be in transaction? job_queue.progress(job) # this is the key; otherwise, the SQL connection pool will be sucked up. # OH WHY DID I PICK THIS TIME TO WORRY ABOUT THREADING Session.close()
def process_snp_job(job_queue, snp_source, snp_table): logger = logging.getLogger(LOGGER_NAME) in_progress = job_queue.in_progress(job_type=JOB_ID_PROCESS_SNP_RSID) for job in in_progress: job_queue.finish_tree(job, None) remaining = job_queue.remaining(job_type=(JOB_ID_PROCESS_SNPS, JOB_ID_PROCESS_SNP_RSID, JOB_ID_PROCESS_GEX_SNPS)) for job in remaining: if job.type == JOB_ID_PROCESS_SNPS: snps = [] struct = JSONMessage.unserialize(job.input_message) cached_sequence_id = struct.cached_sequence_id cached_seq = Session.query(AmpliconSequenceCache).get(cached_sequence_id) if not cached_seq: logger.error("SNP job: Unknown amplicon sequence id: %s [job %s]" % (cached_sequence_id, job.id)) job_queue.abort(job, JSONErrorMessage("Unknown amplicon sequence id: %s" % cached_sequence_id)) try: snps = snp_source.snps_in_range(cached_seq.chromosome, cached_seq.start_pos-cached_seq.seq_padding_pos5, cached_seq.end_pos+cached_seq.seq_padding_pos3) except Exception: # DB timeout: abort job. logger.exception("Error from SNP worker:") job_queue.abort(job, JSONErrorMessage("Unable to connect to SNP database.")) continue if snps: db_snps = snp_objects_from_extdb(snps, snp_table) if not cached_seq.snps: cached_seq.snps = [] for snp in db_snps: cached_seq.snps.append(snp) logger.info("SNP process job finished [job %s]" % job.id) Session.commit() job_queue.finish(job, None) elif job.type == JOB_ID_PROCESS_GEX_SNPS: snps = [] struct = JSONMessage.unserialize(job.input_message) transcript_id = struct.transcript_id transcript = Session.query(Transcript).get(transcript_id) if not transcript: logger.error("GEX SNP job: Unknown transcript id: %s [job %s]" % (transcript_id, job.id)) job_queue.abort(job, JSONErrorMessage("Unknown transcript id %s" % transcript_id)) try: print transcript.exon_regions snps = snp_source.snps_in_chrom_ranges(transcript.chromosome, transcript.exon_bounds) except Exception: # DB timeout logger.exception("Error from SNP worker:") job_queue.abort(job, JSONErrorMessage("Unable to connect to SNP database.")) continue if snps: # transcript? db_snps = snp_objects_from_extdb(snps, snp_table) if not transcript.snps: transcript.snps = [] for snp in db_snps: transcript.snps.append(snp) logger.info("GEX SNP process job finished [job %s]" % job.id) Session.commit() job_queue.finish(job, None) elif job.type == JOB_ID_PROCESS_SNP_RSID: struct = JSONMessage.unserialize(job.input_message) sequence_group_id = struct.sequence_group_id sequence_group = Session.query(SequenceGroup).get(sequence_group_id) if not sequence_group: logger.error("Process RSID unknown sequence id: %s [job %s]" % (sequence_group_id, job.id)) job_queue.abort(job, JSONErrorMessage("Unknown sequence id.")) snp_rsid = sequence_group.snp_rsid if not snp_rsid: logger.error("Process RSID empty RSID: %s [job %s]" % (snp_rsid, job.id)) job_queue.abort(job, JSONErrorMessage("Empty SNP rsid.")) try: snps = snp_source.snps_by_rsid(snp_rsid) if not snps: logger.info("Process RSID unknown RSID: %s [job %s]" % (snp_rsid, job.id)) job_queue.abort(job, JSONErrorMessage("Unknown SNP rsid.")) continue except Exception: logger.exception("Error from SNP worker:") job_queue.abort(job, JSONErrorMessage("Unable to connect to SNP database.")) continue locations = [] for snp in snps: chromosome = snp['chrom'][3:] if snp['refUCSC'] == '-': # deletion: start = snp['chromStart'] else: start = snp['chromStart']+1 end = snp['chromEnd'] message = ProcessSNPAmpliconMessage(sequence_group_id, chromosome, start, end) job_queue.add(JOB_ID_PROCESS_SNP_AMPLICON, message, parent_job=job) # TODO: need to be in transaction? job_queue.progress(job) Session.close()