def main(args): outbamfn = args.outBamFile configReader = params.GetConfigReader() params.InitConfigReader(args.configfile) params.SetCancerType(args.cancerType) params.SetOutputFileName(args.outBamFile) params.SetSplitBamsPath(args.splitbams) params.SetPhase(args.phase) params.SetctDNA(args.ctDNA) params.SetXY(args.singleXY) params.SetCNV(args.cnvBed) results_path = configReader.get('RESULTS', 'results_path') cnvdir = "/".join([results_path, "cnv_dir"]) if (not os.path.exists(cnvdir)): os.makedirs(cnvdir) createEventBedFiles(cnvdir, params.GetCNV()) params.SetCNVDir(cnvdir) # set software paths java_path = bamhelp.GetJavaPath() beagle_path = bamhelp.GetBeaglePath() samtools_path = bamhelp.GetSamtoolsPath() bedtools_path = bamhelp.GetBedtoolsPath() vcftools_path = bamhelp.GetVCFtoolsPath() sambamba_path = bamhelp.GetSambambaPath() params.SetSoftwarePath(java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path) if (args.phase): run_pipeline(results_path) else: print('Please provide costume phasing algorithm')
def split_bam_task_list(): """populates task inputs and outputs""" (sentinel_path, results_path, haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path) = taskHelpers.GetProjectNamePathRunID() inputs = [] outputs = [] prev_sentinels = [] prev_sentinels.append(taskHelpers.CreateFileList('None', -1, sentinel_path)) split_path = "/".join([results_path, "splitbams"]) params.SetSplitBamsPath(split_path) utils.createDirectory(split_path) sentinels = taskHelpers.CreateFileList('{0}_split.sentinel', 1, sentinel_path) inputs.append(taskHelpers.CreateFileList('bam', 1, split_path + "/")) outputs.append( taskHelpers.CreateFileList('chr{1}.bam', 22, split_path + "/")) sample_ids = taskHelpers.CreateFileList('{0}', 1, '') job_parameters = taskHelpers.CreateTaskList(inputs, sentinels, outputs, sample_ids, prev_sentinels) for job in job_parameters: yield job
def main(args): outbamfn = args.outBamFile configReader = params.GetConfigReader() params.InitConfigReader(args.configfile) params.SetGainCNV(args.cnvAmpFile) params.SetLossCNV(args.cnvDelFile) params.SetCancerType(args.cancerType) params.SetOutputFileName(args.outBamFile) params.SetSplitBamsPath(args.splitbams) results_path = configReader.get('RESULTS', 'results_path') #set software paths java_path = bamhelp.GetJavaPath() beagle_path = bamhelp.GetBeaglePath() samtools_path = bamhelp.GetSamtoolsPath() bedtools_path = bamhelp.GetBedtoolsPath() vcftools_path = bamhelp.GetVCFtoolsPath() sambamba_path = bamhelp.GetSambambaPath() params.SetSoftwarePath(java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path) if (args.phase): run_pipeline(results_path)
verify_msg = infile_msg + '\n' + config_msg pipelineHelpers.LogInputCheck(verify_msg) if not (infile_pass and config_pass): raise Exception('Invalid input.') pipeline_msg = '\n---------------------------------\n' \ 'Running pipeline\n' \ '---------------------------------' print pipeline_msg log = pipelineHelpers.GetLogFile('MAIN') pipelineHelpers.Logging('INFO', log, pipeline_msg) num_procs = line_count * 4 if( args.phase): if(not args.splitbams): pipeline_run([pipeline.find_roi_bam], multiprocess=num_procs, verbose=1) if(args.cnvDelFile): pipeline_run([pipeline.complete_pipeline]) else: pipeline_run([pipeline.complete_pipeline_gain]) else: params.SetSplitBamsPath(args.splitbams) ##to do else: print('user must provide phase VCFs') #user provides phased VCF t1 = time.time()
def run_pipeline(results_path): print(results_path) global haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile, terminating, logger, logQueue, res_path res_path = results_path haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile = handle.GetProjectPaths( results_path) terminating, logger, logQueue = handle.GetLoggings(logfile) chr_list = ['chr' + str(x) for x in range(1, 23)] chr_list.extend(['chrX', 'chrY']) t0 = time.time() outbamfn = params.GetOutputFileName() cnv_list = glob.glob("/".join([params.GetCNVDir(), '*.*'])) chromosome_event = create_chr_event_list(cnv_list, chr_list) logger.debug('pipeline started!') phase_path = '/'.join([results_path, 'phasedvcfdir']) if not os.path.exists('/'.join([results_path, 'phasedvcfdir'])): os.makedirs(phase_path) initialize0(phase_path, cancer_dir_path) for cnv_path in cnv_list: initialize_pipeline(phase_path, haplotype_path, cnv_path) pool1 = multiprocessing.Pool( processes=12, initializer=initPool, initargs=[logQueue, logger.getEffectiveLevel(), terminating]) try: if not params.GetSplitBamsPath(): if not os.path.exists("/".join([res_path, 'splitbams'])): os.makedirs("/".join([res_path, 'splitbams'])) params.SetSplitBamsPath("/".join([res_path, 'splitbams'])) result0 = pool1.map_async(split_bam_by_chr, chromosome_event).get(9999999) result1 = pool1.map_async(find_roi_bam, chromosome_event).get(9999999) result2 = pool1.map_async(implement_cnv, chromosome_event).get(9999999) pool1.close() except KeyboardInterrupt: logger.debug('You cancelled the program!') pool1.terminate() except Exception as e: logger.exception("Exception in main %s", e) pool1.terminate() finally: pool1.join() time.sleep(.1) mergeSortBamFiles(outbamfn, finalbams_path) t1 = time.time() shutil.rmtree(tmpbams_path) logger.debug(' ***** pipeline finished in ' + str(round((t1 - t0) / 60.0, 1)) + ' minutes ***** ') logging.shutdown()