def complete_pipeline(inputs, output_sentinel, outputs, sample_id, prev_sentinel): """merge, sort, clean up """ task_list = [] log_msg = ' [Final merge] ' + '[' + sample_id + '] ' pipelineHelpers.Logging('INFO', log, log_msg + 'Starting') if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg): python = sys.executable current_path = params.GetProgramPath() script_path = pipelineHelpers.GetScriptPath( sample_id, bamhelp.name) bamgineer_mem = bamhelp.GetBamgineerMem('high') mergedbamname = params.GetOutputFileName() script = open('{0}mergesort.sh'.format(script_path), 'w') script.write('#!/bin/bash\n') script.write('#\n') script.write('#$ -cwd \n') script.write('module load sambamba \n') script.write('python {path}/mergesort.py ' ' {mergedfinal} {finalbamdir}\n'.format(path=current_path, mergedfinal=mergedbamname, finalbamdir=finalbams_path)) script.close() process = pipelineHelpers.RunTask( os.path.abspath(script.name), 4, bamgineer_mem, sample_id, bamhelp.name) task_list.append(process) pipelineHelpers.CheckTaskStatus( task_list, output_sentinel, log, log_msg) pipelineHelpers.Logging('INFO', log, log_msg + 'COMPLETE!')
def run_pipeline(results_path): global haplotype_path,cancer_dir_path,tmpbams_path, finalbams_path,log_path, logfile ,terminating,logger,logQueue haplotype_path,cancer_dir_path,tmpbams_path, finalbams_path,log_path, logfile = handle.GetProjectPaths(results_path) terminating,logger,logQueue = handle.GetLoggings(logfile) t0 = time.time() outbamfn=params.GetOutputFileName() chromosome_event = create_chr_event_list() chromosomes_bamfiles = create_chr_bam_list() logger.debug('pipeline started!') initialize(results_path,haplotype_path,cancer_dir_path) pool1 = multiprocessing.Pool(processes=4, initializer=initPool, initargs=[logQueue, logger.getEffectiveLevel(), terminating] ) try: result1 = pool1.map_async(find_roi_bam, chromosome_event ).get(9999999) result2 = pool1.map_async(implement_cnv, chromosome_event ).get(9999999) pool1.close() except KeyboardInterrupt: logger.debug('You cancelled the program!') pool1.terminate() except Exception as e: logger.exception("Exception in main %s" , e) pool1.terminate() finally: pool1.join() time.sleep(.1) mergeSortBamFiles(outbamfn, finalbams_path ) t1 = time.time() shutil.rmtree(tmpbams_path) logger.debug(' ***** pipeline finished in ' + str(round((t1 - t0)/60.0, 1)) +' minutes ***** ') logging.shutdown()
def complete_pipeline_gain_task_list(): (sentinel_path, results_path, haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path) = taskHelpers.GetProjectNamePathRunID() inputs = [] outputs = [] prev_sentinels = [] prev_sentinels.append( taskHelpers.CreateFileList('{0}_subsample_gain.sentinel', 1, sentinel_path)) sentinels = taskHelpers.CreateFileList('{0}_sortmerge.sentinel', 1, sentinel_path) inputs.append( taskHelpers.CreateFileList('{0}_{1}_{2}.bam', 88, finalbams_path, "FINAL")) outputs.append( taskHelpers.CreateFileList(params.GetOutputFileName(), 1, finalbams_path)) sample_ids = taskHelpers.CreateFileList('{0}', 1, '') job_parameters = taskHelpers.CreateTaskList(inputs, sentinels, outputs, sample_ids, prev_sentinels) for job in job_parameters: yield job
def run_pipeline(results_path): print(results_path) global haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile, terminating, logger, logQueue, res_path res_path = results_path haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile = handle.GetProjectPaths( results_path) terminating, logger, logQueue = handle.GetLoggings(logfile) chr_list = ['chr' + str(x) for x in range(1, 23)] chr_list.extend(['chrX', 'chrY']) t0 = time.time() outbamfn = params.GetOutputFileName() cnv_list = glob.glob("/".join([params.GetCNVDir(), '*.*'])) chromosome_event = create_chr_event_list(cnv_list, chr_list) logger.debug('pipeline started!') phase_path = '/'.join([results_path, 'phasedvcfdir']) if not os.path.exists('/'.join([results_path, 'phasedvcfdir'])): os.makedirs(phase_path) initialize0(phase_path, cancer_dir_path) for cnv_path in cnv_list: initialize_pipeline(phase_path, haplotype_path, cnv_path) pool1 = multiprocessing.Pool( processes=12, initializer=initPool, initargs=[logQueue, logger.getEffectiveLevel(), terminating]) try: if not params.GetSplitBamsPath(): if not os.path.exists("/".join([res_path, 'splitbams'])): os.makedirs("/".join([res_path, 'splitbams'])) params.SetSplitBamsPath("/".join([res_path, 'splitbams'])) result0 = pool1.map_async(split_bam_by_chr, chromosome_event).get(9999999) result1 = pool1.map_async(find_roi_bam, chromosome_event).get(9999999) result2 = pool1.map_async(implement_cnv, chromosome_event).get(9999999) pool1.close() except KeyboardInterrupt: logger.debug('You cancelled the program!') pool1.terminate() except Exception as e: logger.exception("Exception in main %s", e) pool1.terminate() finally: pool1.join() time.sleep(.1) mergeSortBamFiles(outbamfn, finalbams_path) t1 = time.time() shutil.rmtree(tmpbams_path) logger.debug(' ***** pipeline finished in ' + str(round((t1 - t0) / 60.0, 1)) + ' minutes ***** ') logging.shutdown()