def complete_pipeline(inputs, output_sentinel, outputs, sample_id, prev_sentinel): """merge, sort, clean up """ task_list = [] log_msg = ' [Final merge] ' + '[' + sample_id + '] ' pipelineHelpers.Logging('INFO', log, log_msg + 'Starting') if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg): python = sys.executable current_path = params.GetProgramPath() script_path = pipelineHelpers.GetScriptPath( sample_id, bamhelp.name) bamgineer_mem = bamhelp.GetBamgineerMem('high') mergedbamname = params.GetOutputFileName() script = open('{0}mergesort.sh'.format(script_path), 'w') script.write('#!/bin/bash\n') script.write('#\n') script.write('#$ -cwd \n') script.write('module load sambamba \n') script.write('python {path}/mergesort.py ' ' {mergedfinal} {finalbamdir}\n'.format(path=current_path, mergedfinal=mergedbamname, finalbamdir=finalbams_path)) script.close() process = pipelineHelpers.RunTask( os.path.abspath(script.name), 4, bamgineer_mem, sample_id, bamhelp.name) task_list.append(process) pipelineHelpers.CheckTaskStatus( task_list, output_sentinel, log, log_msg) pipelineHelpers.Logging('INFO', log, log_msg + 'COMPLETE!')
def subsample_loss(inputs, output_sentinel, outputs, sample_id, prev_sentinel): """adjusting sample rate for Bam files""" task_list = [] log_msg = ' [subsample loss events] ' + '[' + sample_id + '] ' if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg): pipelineHelpers.Logging('INFO', log, log_msg + 'Starting') python = sys.executable current_path = params.GetProgramPath() script_path = pipelineHelpers.GetScriptPath( sample_id, bamhelp.name) bamgineer_mem = bamhelp.GetBamgineerMem('med') for inp in inputs[0]: chrevent=os.path.basename(inp).strip().split("_")[0] chr = re.split('(\d+)',chrevent)[1] original_bam = sub('.mutated.merged.sorted.bam', '.sorted.bam', inp) sentinel_path, results_path,haplotype_path,cancer_dir_path,tmpbams_path,finalbams_path = taskHelpers.GetProjectNamePathRunID() LOSS_FINAL = "/".join([finalbams_path, 'CHR'+str(chr).upper() +'_LOSS.bam']) script = open('{0}sample_{1}_{2}.sh'.format(script_path, 'chr'+str(chr), "loss"), 'w') script.write('#!/bin/bash\n') script.write('#\n') script.write('#$ -cwd \n') script.write('module load samtools/1.2 \n') script.write('python {path}/subsample_loss.py {inbam} {fl} \n'.format(path=current_path,inbam=inp, fl=LOSS_FINAL)) script.close() process = pipelineHelpers.RunTask( os.path.abspath(script.name),4, bamgineer_mem, sample_id, bamhelp.name) task_list.append(process) pipelineHelpers.CheckTaskStatus( task_list, output_sentinel, log, log_msg) pipelineHelpers.Logging('INFO', log, log_msg+ 'Finished Sampling Loss Event')
def repair_gain( inputs, output_sentinel, outputs, sample_id, prev_sentinel): """implementing cnv module and finding reads not matching hg19 at germline SNP locations""" task_list = [] log_msg = ' [re-pairing reads] ' + '[' + sample_id + '] ' pipelineHelpers.Logging('INFO', log, log_msg + 'Starting') if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg): python = sys.executable current_path = params.GetProgramPath() script_path = pipelineHelpers.GetScriptPath( sample_id, bamhelp.name) bamgineer_mem = bamhelp.GetBamgineerMem('med') for inp in inputs[0]: chr= os.path.basename(inp).strip().split(".")[0] script = open('{0}re-pair_{1}_{2}.sh'.format(script_path, chr, "gain"), 'w') script.write('#!/bin/bash\n\n') script.write('module load samtools/1.2 \n') script.write('module load sambamba \n') script.write('python {path}/re-pair.py {inbam} \n'.format(inbam=inp, path=current_path )) script.close() process = pipelineHelpers.RunTask( os.path.abspath(script.name),4, bamgineer_mem, sample_id, bamhelp.name) task_list.append(process) pipelineHelpers.CheckTaskStatus( task_list, output_sentinel, log, log_msg) pipelineHelpers.Logging('INFO', log, log_msg + 'Finished Re-pairing')
def mutate_gain(inputs, output_sentinel, outputs, sample_id, prev_sentinel): """mutating reads according to haplotype at germline SNP locations""" task_list = [] log_msg = ' [implement_cnv] ' + '[' + sample_id + '] ' if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg): pipelineHelpers.Logging('INFO', log, log_msg + 'Starting') python = sys.executable current_path = params.GetProgramPath() script_path = pipelineHelpers.GetScriptPath( sample_id, bamhelp.name) bamgineer_mem = bamhelp.GetBamgineerMem('med') sentinel_path, results_path,haplotype_path,cancer_dir_path,tmpbams_path,finalbams_path = taskHelpers.GetProjectNamePathRunID() for inp in inputs[0]: chr= os.path.basename(inp).strip().split(".")[0] bedfn= "/".join([haplotype_path, 'gain_het_snp_' + chr + '.bed']) diffn = "/".join([tmpbams_path,"diff.bam"]) nonhet= "/".join([tmpbams_path, 'diff_only1_' + os.path.basename(inp)]) hetfn=sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.het.bam', inp) hetfnsorted = sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.het.sorted.bam', inp) mergedsortfn = sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.merged.sorted.bam', inp) mergedrenamedfn = sub('.gain.roi.repaired.sorted.bam$','.gain.renamed.mutated.merged.sorted.bam', inp) script = open('{0}mutate_{1}_{2}.sh'.format(script_path, chr, "gain"), 'w') script.write('#!/bin/bash\n') script.write('#') script.write('#$ -cwd \n') script.write('module load samtools/1.2 \n') script.write('module load sambamba \n') script.write('module load bamUtil \n') script.write('sort -u {bf} -o {bf}\n\n'.format(bf=bedfn)) script.write('python {path}/mutate.py {repairedbam} {bf} {happath}\n\n'.format(repairedbam=inp, bf=bedfn ,path=current_path , happath=haplotype_path)) script.write('sambamba sort {het} -o {hetsort}\n\n'.format(het=hetfn, hetsort=hetfnsorted)) script.write('bam diff --in1 {repairedbam} --in2 {hetsort} --out {dif}\n\n'.format(repairedbam=inp, hetsort=hetfnsorted ,dif=diffn )) script.write('sambamba merge {merged} {hetonly} {nonhetonly}\n\n'.format(merged=mergedsortfn,hetonly=hetfnsorted, nonhetonly= nonhet)) script.write('rm {het} {nonhetonly} \n\n'.format(het=hetfn,nonhetonly= nonhet)) script.write('python {path}/rename-reads.py {inp2} {outp}\n\n'.format(inp2= mergedsortfn, outp=mergedrenamedfn, path=current_path)) script.close() process = pipelineHelpers.RunTask( os.path.abspath(script.name),4, bamgineer_mem, sample_id, bamhelp.name) task_list.append(process) pipelineHelpers.CheckTaskStatus( task_list, output_sentinel, log, log_msg) pipelineHelpers.Logging('INFO', log, log_msg + 'Finished Mutating')
import os import sys from re import sub from ruffus import * from helpers import depthOfCoverageTasks, depthOfCoverageHelpers, pipelineHelpers from helpers import runIDHelpers as rid from helpers import parameters as params import subprocess current_path = params.GetProgramPath() log = pipelineHelpers.GetLogFile('DepthOfCoverage') def runCommand(cmd): try: process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, shell=True) stdout, stderr = process.communicate() except OSError as e: print("Execution failed:", e)