Esempio n. 1
0
def subsample_loss(inputs, output_sentinel, outputs, sample_id, prev_sentinel):     
    """adjusting sample rate for Bam files"""
    task_list = []
    log_msg = ' [subsample loss events] ' + '[' + sample_id + '] '
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
            sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
        
        for inp in inputs[0]:
            chrevent=os.path.basename(inp).strip().split("_")[0]
            chr = re.split('(\d+)',chrevent)[1]
            original_bam = sub('.mutated.merged.sorted.bam', '.sorted.bam', inp) 
            sentinel_path, results_path,haplotype_path,cancer_dir_path,tmpbams_path,finalbams_path = taskHelpers.GetProjectNamePathRunID()
            LOSS_FINAL = "/".join([finalbams_path,  'CHR'+str(chr).upper() +'_LOSS.bam'])
    
            script = open('{0}sample_{1}_{2}.sh'.format(script_path, 'chr'+str(chr), "loss"), 'w')
            script.write('#!/bin/bash\n')
            script.write('#\n')
            script.write('#$ -cwd \n')
            script.write('module load samtools/1.2 \n')
            script.write('python {path}/subsample_loss.py {inbam} {fl} \n'.format(path=current_path,inbam=inp, fl=LOSS_FINAL)) 
            
            script.close()
            process = pipelineHelpers.RunTask( 
                os.path.abspath(script.name),4, bamgineer_mem,
                sample_id, bamhelp.name)
            task_list.append(process)
                
            pipelineHelpers.CheckTaskStatus(
                            task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg+ 'Finished Sampling Loss Event')
Esempio n. 2
0
def complete_pipeline(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """merge, sort, clean up """
    task_list = []
    log_msg = ' [Final merge] ' + '[' + sample_id + '] '

    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):

        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
                sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('high')
        mergedbamname = params.GetOutputFileName()
       
        script = open('{0}mergesort.sh'.format(script_path), 'w')
        script.write('#!/bin/bash\n')
        script.write('#\n')
        script.write('#$ -cwd \n')
        script.write('module load sambamba \n')
        
        script.write('python {path}/mergesort.py '
                                     ' {mergedfinal} {finalbamdir}\n'.format(path=current_path,  mergedfinal=mergedbamname, finalbamdir=finalbams_path))

        script.close()   
        process = pipelineHelpers.RunTask( os.path.abspath(script.name), 4, bamgineer_mem,
                            sample_id, bamhelp.name)
        task_list.append(process)
        pipelineHelpers.CheckTaskStatus(
                    task_list, output_sentinel, log, log_msg)
       
         
    pipelineHelpers.Logging('INFO', log, log_msg + 'COMPLETE!')
Esempio n. 3
0
def repair_gain( inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """implementing cnv module and finding reads not matching hg19 at germline SNP locations"""
    task_list = []
    log_msg = ' [re-pairing reads] ' + '[' + sample_id + '] '
    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
                sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
       
        for inp in inputs[0]:
            chr= os.path.basename(inp).strip().split(".")[0]
            
            script = open('{0}re-pair_{1}_{2}.sh'.format(script_path, chr, "gain"), 'w')
            script.write('#!/bin/bash\n\n')
            script.write('module load samtools/1.2 \n')
            script.write('module load sambamba \n')
            script.write('python {path}/re-pair.py {inbam} \n'.format(inbam=inp, path=current_path ))        
            script.close()
            process = pipelineHelpers.RunTask( 
                os.path.abspath(script.name),4, bamgineer_mem,
                sample_id, bamhelp.name)
            task_list.append(process)
            
        pipelineHelpers.CheckTaskStatus(
                        task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished Re-pairing')
Esempio n. 4
0
def sort_by_name(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """sorting bam file by name"""

    task_list = []
    log_msg = ' [SortByName] ' + '[' + sample_id + '] '

    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        python = sys.executable
        script_path = pipelineHelpers.GetScriptPath(
            sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
        for outp, inp, num  in zip( outputs[0], inputs[0], chr_list):

                script = open(
                    '{0}sortbyname_chr{1}.sh'.format(script_path,
                                                         num), 'w')

                script.write('#!/bin/bash\n')
                script.write('#\n')
                script.write('#$ -cwd \n')
                script.write('module load sambamba \n')
                script.write('sambamba sort -n {inp} '
                             '-o {outp} -t 4 \n'.format(inp = inp, outp=outp))
                script.close()
                process = pipelineHelpers.RunTask(
                    os.path.abspath(script.name), 4, bamgineer_mem,
                    sample_id,  bamhelp.name)
                task_list.append(process)
        pipelineHelpers.CheckTaskStatus(
                    task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished SplitBams')
Esempio n. 5
0
def mutate_gain(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """mutating reads according to haplotype at germline SNP locations"""
    task_list = []
    log_msg = ' [implement_cnv] ' + '[' + sample_id + '] '
    
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
        python = sys.executable

        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
            sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
        sentinel_path, results_path,haplotype_path,cancer_dir_path,tmpbams_path,finalbams_path = taskHelpers.GetProjectNamePathRunID()
        
        for inp in inputs[0]:
           
            chr= os.path.basename(inp).strip().split(".")[0]
           
            
            bedfn= "/".join([haplotype_path, 'gain_het_snp_' + chr + '.bed'])
            diffn =   "/".join([tmpbams_path,"diff.bam"])
            nonhet= "/".join([tmpbams_path, 'diff_only1_' +  os.path.basename(inp)])
            hetfn=sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.het.bam', inp)
            hetfnsorted = sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.het.sorted.bam', inp)
            mergedsortfn = sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.merged.sorted.bam', inp)
            mergedrenamedfn = sub('.gain.roi.repaired.sorted.bam$','.gain.renamed.mutated.merged.sorted.bam', inp)
            
            script = open('{0}mutate_{1}_{2}.sh'.format(script_path, chr, "gain"), 'w')
            script.write('#!/bin/bash\n')
            script.write('#')
            script.write('#$ -cwd \n')
            script.write('module load samtools/1.2 \n')
            script.write('module load sambamba \n')
            script.write('module load bamUtil \n')  
                
            script.write('sort -u {bf} -o {bf}\n\n'.format(bf=bedfn))
            script.write('python {path}/mutate.py {repairedbam} {bf} {happath}\n\n'.format(repairedbam=inp, bf=bedfn ,path=current_path , happath=haplotype_path))        
            script.write('sambamba sort {het} -o {hetsort}\n\n'.format(het=hetfn, hetsort=hetfnsorted))
            script.write('bam diff --in1 {repairedbam} --in2 {hetsort} --out {dif}\n\n'.format(repairedbam=inp, hetsort=hetfnsorted ,dif=diffn ))  
            script.write('sambamba merge {merged} {hetonly} {nonhetonly}\n\n'.format(merged=mergedsortfn,hetonly=hetfnsorted, nonhetonly= nonhet))
            script.write('rm {het} {nonhetonly}  \n\n'.format(het=hetfn,nonhetonly= nonhet))
            script.write('python {path}/rename-reads.py {inp2} {outp}\n\n'.format(inp2= mergedsortfn, outp=mergedrenamedfn, path=current_path))
            
            script.close()
            process = pipelineHelpers.RunTask( 
                os.path.abspath(script.name),4, bamgineer_mem,
                sample_id, bamhelp.name)
            task_list.append(process)
                
            pipelineHelpers.CheckTaskStatus(
                            task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished Mutating')
def find_roi_bam(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """finding ROI bam for each haplotype/event/chr"""
    task_list = []
    log_msg = ' [FindRoiBam] ' + '[' + sample_id + '] '

    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        python = sys.executable
        script_path = pipelineHelpers.GetScriptPath(sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
        sentinel_path, results_path, haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path = taskHelpers.GetProjectNamePathRunID(
        )

        for inp, op in izip(inputs[0], outputs[0]):
            opsorted = sub('.bam$', ".sorted.bam", op)
            chr = os.path.basename(op).strip().split(".")[0]
            event = os.path.basename(op).strip().split(".")[1]
            exonsinroibed = "/".join(
                [haplotype_path, event + "_exons_in_roi_" + str(chr) + '.bed'])
            script = open(
                '{0}find_roi_{1}_{2}.sh'.format(script_path, chr, event), 'w')
            script.write('#!/bin/bash\n\n')
            script.write('#\n')
            script.write('#$ -cwd \n')
            script.write('module load bedtools \n')
            script.write('module load sambamba \n')

            script.write('sort -u {exonbed} -o {exonbed} \n'.format(
                exonbed=exonsinroibed))
            script.write('bedtools pairtobed -abam {inp} '
                         '-b {bf} -type either > {outp} \n'.format(
                             inp=inp, bf=exonsinroibed, outp=op))
            script.write('sambamba sort {outp} -o '
                         '{outpsorted} \n'.format(outp=op,
                                                  outpsorted=opsorted))
            script.write('rm {outp} \n'.format(outp=op))
            script.close()
            process = pipelineHelpers.RunTask(os.path.abspath(script.name), 1,
                                              bamgineer_mem, sample_id,
                                              bamhelp.name)

            task_list.append(process)
        pipelineHelpers.CheckTaskStatus(task_list, output_sentinel, log,
                                        log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished FindROI')
Esempio n. 7
0
def split_bams(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """splits bam according to chromosome"""
    task_list = []
    log_msg = ' [SplitBams] ' + '[' + sample_id + '] '

    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        python = sys.executable
        script_path = pipelineHelpers.GetScriptPath(
            sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')

        exons = params.GetExonPath()
        command=  "".join(["""awk '($1 ~ "chr"){print $0 >> $1".bed" }' """, exons])
        initialize()
        
        os.chdir(script_path)
        #utils.runCommand(command)
        for  outp, num in zip(outputs[0], chr_list):
            script = open(
                '{0}splitbam_chr{1}.sh'.format(script_path,
                                                     num), 'w')

            script.write('#!/bin/bash\n')
            script.write('#\n')
            script.write('#$ -cwd \n')
            script.write('module load samtools \n')
            script.write(command +'\n')
            script.write('samtools view -bh {bam} chr{num} > '
                         '{out}\n'.format(bam=inputs[0][0],
                                          num=num, out=outp))
            script.close()

            process = pipelineHelpers.RunTask(
                os.path.abspath(script.name), 4, bamgineer_mem,
                sample_id,  bamhelp.name)
            task_list.append(process)
           
        pipelineHelpers.CheckTaskStatus(
                task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished task1')