Exemple #1
0
def process_amplicon_job(job_queue, sequence_source, dg_calc):
    remaining = job_queue.remaining(job_type=(JOB_ID_PROCESS_TAQMAN_AMPLICON,
                                              JOB_ID_PROCESS_LOCATION_AMPLICON,
                                              JOB_ID_PROCESS_SNP_AMPLICON,
                                              JOB_ID_PROCESS_GEX_TAQMAN_TRANSCRIPT))
    for job in remaining:
        if job.type == JOB_ID_PROCESS_TAQMAN_AMPLICON:
            process_taqman_job(job, job_queue, sequence_source, dg_calc)
        elif job.type == JOB_ID_PROCESS_LOCATION_AMPLICON:
            process_location_job(job, job_queue, sequence_source, dg_calc)
        elif job.type == JOB_ID_PROCESS_SNP_AMPLICON:
            process_snp_job(job, job_queue, sequence_source, dg_calc)
        elif job.type == JOB_ID_PROCESS_GEX_TAQMAN_TRANSCRIPT:
            process_transcript_job(job, job_queue, sequence_source, dg_calc)
    
    Session.close()
Exemple #2
0
def process_reprocess_job( job_queue, config ):
    logger = logging.getLogger(LOGGER_NAME)

    #mark finished first, 
    #in_progress = job_queue.in_progress(job_type=JOB_ID_PROCESS_REPROCESS)
    in_progress = job_queue.in_progress(job_type=JOB_ID_REPROCESS_QLTESTER)
    for job in in_progress:
        job_queue.finish_tree(job, None)
        if job_queue.is_job_done(job):
            logger.info("Job finished [job %s]" % job.id)
            # We might want to add a flag to the reprocess group/algorthm to prevent multiple starts
            # Below is an example of what to do...
            #args = job_queue.get_job_input_params(job)
            #assay = Session.query(SequenceGroup).get(args.sequence_group_id)
            #if assay:
            #    assay.analyzed = True
            #    Session.commit()

    # now process remaining jobs in que
    remaining = job_queue.remaining(job_type=(JOB_ID_REPROCESS_QLTESTER,JOB_ID_REPROCESS_LOAD_QTOOLS))
    for job in remaining:
        if job.type == JOB_ID_REPROCESS_QLTESTER:

            analysis_group, reprocessor = retreive_and_validate_inputs( job, job_queue, logger )

            ## Try to launch connection
            try:
                import paramiko
                import time

                qltester_host = config['qtools.qltester.host']
                qltester_pw   = config['qtools.qltester.pw']
                qltester_user = config['qtools.qltester.user']

                qltester_cmd = 'python ./run_reprocessor.py ' + str( analysis_group.id ) + ' ' + reprocessor.code
                                       
                client = paramiko.SSHClient()
                client.load_system_host_keys()
                client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
                client.connect(qltester_host, 22, qltester_user, qltester_pw)
                chan = client.get_transport().open_session()
                chan.exec_command( qltester_cmd )

                while ( not chan.exit_status_ready() ):         
                    time.sleep(10)
                
                reprocess_result = chan.recv_exit_status()
                client.close()

            except Exception:
                # DB timeout: abort job.
                logger.exception("Error from Reprocessor worker:")
                job_queue.abort(job, JSONErrorMessage("Unable to connect to qltester."))
                continue
            
            if ( reprocess_result == 0 ):
                ## if sucesful, update db? 
                logger.info("Reprocess process job finished [job %s]" % job.id)
                Session.commit()
    
                ## add step-2 to job que..
                message = JSONMessage(analysis_group_id=analysis_group.id,reprocess_config_id=reprocessor.id)
                job_queue.add(JOB_ID_REPROCESS_LOAD_QTOOLS, message, parent_job=job)           

                #mark progress
                job_queue.progress(job)              
            
            elif( reprocess_result == 1 ):
                #logger is currently processing anouther job try again later....
                logger.info("Reprocessor is busy, process job [job %s] will be attempted again later" % job.id )
            else:
                logger.info("Reprocess process job failed [job %s] qltester exit code %d" % (job.id, reprocess_result) )
                job_queue.abort(job, JSONErrorMessage("Reprocessor failed on qltester: Non-zero result status (%d)" % reprocess_result)) 
        
        elif job.type == JOB_ID_REPROCESS_LOAD_QTOOLS:
            
            analysis_group, reprocessor = retreive_and_validate_inputs( job, job_queue, logger )

            try:
                result = update_reprocess_analysis_group_data(analysis_group, reprocessor, config, logger)

            except Exception:
                logger.exception("Error from Reprocess worker:")
                job_queue.abort(job, JSONErrorMessage("Unable add to qtools"))
                continue
            
            # avoid condition where job completed -- clear child job first
            if ( result == 0 ):
                job_queue.finish(job, None)
            else:
                logger.exception("Error from Reprocess worker [job %s]: Non-zero result status (%d)" % job.id, result )
                job_queue.abort(job, JSONErrorMessage("Unable add to qtools: Non-zero result status (%d)" % result))
                continue

 
    # this is key; otherwise, the SQL connection pool will be sucked up.
    Session.close()
Exemple #3
0
def process_assay_job(job_queue, tm_calc, dg_calc):
    logger = logging.getLogger(LOGGER_NAME)

    # mark finished first
    in_progress = job_queue.in_progress(job_type=JOB_ID_PROCESS_ASSAY)
    for job in in_progress:
        job_queue.finish_tree(job, None)
        if job_queue.is_job_done(job):
            logger.info("Job finished [job %s]" % job.id)
            args = job_queue.get_job_input_params(job)
            assay = Session.query(SequenceGroup).get(args.sequence_group_id)
            if assay:
                assay.analyzed = True
                Session.commit()
    
    remaining = job_queue.remaining(job_type=JOB_ID_PROCESS_ASSAY)
    for job in remaining:
        struct = JSONMessage.unserialize(job.input_message)
        sg_id = struct.sequence_group_id
        
        sg = Session.query(SequenceGroup).get(sg_id)
        # TODO: I think the parent function should have cleared out the amplicons,
        # but I need to make that choice
        if not sg:
            logger.error("Unknown sequence group id: %s [job %s]" % (sg_id, job.id))
            job_queue.abort(job, JSONErrorMessage("Unknown sequence group id: %s" % sg_id))
        
        if sg.kit_type == SequenceGroup.TYPE_DESIGNED:
            probe_ids = [p.id for p in sg.probes]
            # TODO: transaction?
            if sg.type == SequenceGroup.ASSAY_TYPE_GEX:
                job_type = JOB_ID_PROCESS_GEX_TAQMAN_TRANSCRIPT
            else:
                job_type = JOB_ID_PROCESS_TAQMAN_AMPLICON

            for fp in sg.forward_primers:
                for rp in sg.reverse_primers:
                    job_queue.add(job_type,
                                  ProcessPrimerAmpliconMessage(sequence_group_id=sg.id,
                                                               forward_primer_id=fp.id,
                                                               reverse_primer_id=rp.id,
                                                               probe_ids=probe_ids),
                                  parent_job=job)
            
            # TM, DG of sequence components right here
            for fp in sg.forward_primers:
                fp.tm = tm_calc.tm_primer(fp.sequence.sequence)
                fp.dg = dg_calc.delta_g(fp.sequence.sequence)
            for rp in sg.reverse_primers:
                rp.tm = tm_calc.tm_primer(rp.sequence.sequence)
                rp.dg = dg_calc.delta_g(rp.sequence.sequence)
            for p in sg.probes:
                if p.quencher and p.quencher.upper() == 'MGB':
                    p.tm = tm_calc.tm_probe(p.sequence.sequence, mgb=True)
                    p.dg = dg_calc.delta_g(p.sequence.sequence)
                else:
                    p.tm = tm_calc.tm_probe(p.sequence.sequence, mgb=False)
                    p.dg = dg_calc.delta_g(p.sequence.sequence)
            Session.commit()


        elif sg.kit_type == SequenceGroup.TYPE_LOCATION:
            job_queue.add(JOB_ID_PROCESS_LOCATION_AMPLICON,
                          ProcessLocationAmpliconMessage(sequence_group_id=sg.id),
                          parent_job=job)
        
        elif sg.kit_type == SequenceGroup.TYPE_SNP:
            job_queue.add(JOB_ID_PROCESS_SNP_RSID,
                          ProcessSNPRSIDMessage(sequence_group_id=sg.id),
                          parent_job=job)
        
        # TODO: need to be in transaction?
        job_queue.progress(job)
    
    # this is the key; otherwise, the SQL connection pool will be sucked up.
    # OH WHY DID I PICK THIS TIME TO WORRY ABOUT THREADING
    Session.close()
Exemple #4
0
def process_snp_job(job_queue, snp_source, snp_table):
    logger = logging.getLogger(LOGGER_NAME)

    in_progress = job_queue.in_progress(job_type=JOB_ID_PROCESS_SNP_RSID)
    for job in in_progress:
        job_queue.finish_tree(job, None)
    
    remaining = job_queue.remaining(job_type=(JOB_ID_PROCESS_SNPS, JOB_ID_PROCESS_SNP_RSID, JOB_ID_PROCESS_GEX_SNPS))
    for job in remaining:
        if job.type == JOB_ID_PROCESS_SNPS:
            snps               = []
            struct             = JSONMessage.unserialize(job.input_message)
            cached_sequence_id = struct.cached_sequence_id
            cached_seq         = Session.query(AmpliconSequenceCache).get(cached_sequence_id)
            if not cached_seq:
                logger.error("SNP job: Unknown amplicon sequence id: %s [job %s]" % (cached_sequence_id, job.id))
                job_queue.abort(job, JSONErrorMessage("Unknown amplicon sequence id: %s" % cached_sequence_id))
            
            try:
                snps = snp_source.snps_in_range(cached_seq.chromosome,
                                                cached_seq.start_pos-cached_seq.seq_padding_pos5,
                                                cached_seq.end_pos+cached_seq.seq_padding_pos3)
            except Exception:
                # DB timeout: abort job.
                logger.exception("Error from SNP worker:")
                job_queue.abort(job, JSONErrorMessage("Unable to connect to SNP database."))
                continue
            
            if snps:
                db_snps = snp_objects_from_extdb(snps, snp_table)
                if not cached_seq.snps:
                    cached_seq.snps = []
                for snp in db_snps:
                    cached_seq.snps.append(snp)
            
            logger.info("SNP process job finished [job %s]" % job.id)
            Session.commit()
            job_queue.finish(job, None)
        
        elif job.type == JOB_ID_PROCESS_GEX_SNPS:
            snps = []
            struct = JSONMessage.unserialize(job.input_message)
            transcript_id = struct.transcript_id
            transcript = Session.query(Transcript).get(transcript_id)
            if not transcript:
                logger.error("GEX SNP job: Unknown transcript id: %s [job %s]" % (transcript_id, job.id))
                job_queue.abort(job, JSONErrorMessage("Unknown transcript id %s" % transcript_id))
            try:
                print transcript.exon_regions
                snps = snp_source.snps_in_chrom_ranges(transcript.chromosome,
                                                       transcript.exon_bounds)
            except Exception:
                # DB timeout
                logger.exception("Error from SNP worker:")
                job_queue.abort(job, JSONErrorMessage("Unable to connect to SNP database."))
                continue

            if snps:
                # transcript?
                db_snps = snp_objects_from_extdb(snps, snp_table)
                if not transcript.snps:
                    transcript.snps = []
                for snp in db_snps:
                    transcript.snps.append(snp)

            logger.info("GEX SNP process job finished [job %s]" % job.id)
            Session.commit()
            job_queue.finish(job, None)
        
        elif job.type == JOB_ID_PROCESS_SNP_RSID:
            struct = JSONMessage.unserialize(job.input_message)
            sequence_group_id = struct.sequence_group_id
            sequence_group    = Session.query(SequenceGroup).get(sequence_group_id)
            if not sequence_group:
                logger.error("Process RSID unknown sequence id: %s [job %s]" % (sequence_group_id, job.id))
                job_queue.abort(job, JSONErrorMessage("Unknown sequence id."))
            
            snp_rsid = sequence_group.snp_rsid
            if not snp_rsid:
                logger.error("Process RSID empty RSID: %s [job %s]" % (snp_rsid, job.id))
                job_queue.abort(job, JSONErrorMessage("Empty SNP rsid."))
            
            try:
                snps = snp_source.snps_by_rsid(snp_rsid)
                if not snps:
                    logger.info("Process RSID unknown RSID: %s [job %s]" % (snp_rsid, job.id))
                    job_queue.abort(job, JSONErrorMessage("Unknown SNP rsid."))
                    continue
            except Exception:
                logger.exception("Error from SNP worker:")
                job_queue.abort(job, JSONErrorMessage("Unable to connect to SNP database."))
                continue
            
            locations = []
            for snp in snps:
                chromosome = snp['chrom'][3:]
                if snp['refUCSC'] == '-': # deletion:
                    start = snp['chromStart']
                else:
                    start = snp['chromStart']+1
                end = snp['chromEnd']
                message = ProcessSNPAmpliconMessage(sequence_group_id, chromosome, start, end)
                job_queue.add(JOB_ID_PROCESS_SNP_AMPLICON, message, parent_job=job)
            
            # TODO: need to be in transaction?
            job_queue.progress(job)
    
    Session.close()