def wiggle(self): jobs = [] for sample in self.samples: bam_file_prefix = os.path.join("alignment", sample.name, sample.name + ".merged.mdup.") input_bam = bam_file_prefix + "bam" bed_graph_prefix = os.path.join("tracks", sample.name, sample.name) big_wig_prefix = os.path.join("tracks", "bigWig", sample.name) if config.param('tophat', 'strandInfo') != 'fr-unstranded': input_bam_f1 = bam_file_prefix + "tmp1.forward.bam" input_bam_f2 = bam_file_prefix + "tmp2.forward.bam" input_bam_r1 = bam_file_prefix + "tmp1.reverse.bam" input_bam_r2 = bam_file_prefix + "tmp2.reverse.bam" output_bam_f = bam_file_prefix + "forward.bam" output_bam_r = bam_file_prefix + "reverse.bam" bam_f_job = concat_jobs([ samtools.view(input_bam, input_bam_f1, "-bh -F 256 -f 81"), samtools.view(input_bam, input_bam_f2, "-bh -F 256 -f 161"), picard.merge_sam_files([input_bam_f1, input_bam_f2], output_bam_f), Job(command="rm " + input_bam_f1 + " " + input_bam_f2) ], name="wiggle." + sample.name + ".forward_strandspec") bam_r_job = concat_jobs([ Job(command="mkdir -p " + os.path.join("tracks", sample.name) + " " + os.path.join("tracks", "bigWig")), samtools.view(input_bam, input_bam_r1, "-bh -F 256 -f 97"), samtools.view(input_bam, input_bam_r2, "-bh -F 256 -f 145"), picard.merge_sam_files([input_bam_r1, input_bam_r2], output_bam_r), Job(command="rm " + input_bam_r1 + " " + input_bam_r2) ], name="wiggle." + sample.name + ".reverse_strandspec") jobs.extend([bam_f_job, bam_r_job]) outputs = [ [bed_graph_prefix + ".forward.bedGraph", big_wig_prefix + ".forward.bw"], [bed_graph_prefix + ".reverse.bedGraph", big_wig_prefix + ".reverse.bw"], ] else: outputs = [[bed_graph_prefix + ".bedGraph", big_wig_prefix + ".bw"]] for bed_graph_output, big_wig_output in outputs: job = concat_jobs([ Job(command="mkdir -p " + os.path.join("tracks", sample.name) + " " + os.path.join("tracks", "bigWig")), bedtools.graph(input_bam, bed_graph_output, big_wig_output) ], name="wiggle." + re.sub(".bedGraph", "", os.path.basename(bed_graph_output))) jobs.append(job) return jobs
def picard_merge_sam_files(self): jobs = [] for sample in self.samples: # Skip samples with one readset only, since symlink has been created at align step if len(sample.readsets) > 1: alignment_directory = os.path.join("alignment", sample.name) inputs = [os.path.join(alignment_directory, readset.name + ".sorted.bam") for readset in sample.readsets] output = os.path.join(alignment_directory, sample.name + ".sorted.bam") job = picard.merge_sam_files(inputs, output) job.name = "picard_merge_sam_files." + sample.name jobs.append(job) return jobs
def merge_realigned(self): jobs = [] nb_realign_jobs = config.param('indel_realigner', 'nbRealignJobs', type='posint') for sample in self.samples: alignment_directory = os.path.join("alignment", sample.name) realign_directory = os.path.join(alignment_directory, "realign") merged_realigned_bam = os.path.join(alignment_directory, sample.name + ".realigned.qsorted.bam") # if nb_realign_jobs == 1, symlink has been created in indel_realigner and merging is not necessary if nb_realign_jobs > 1: realigned_bams = [os.path.join(realign_directory, sequence['name'] + ".bam") for sequence in self.sequence_dictionary[0:min(nb_realign_jobs - 1, len(self.sequence_dictionary))]] realigned_bams.append(os.path.join(realign_directory, "others.bam")) job = picard.merge_sam_files(realigned_bams, merged_realigned_bam) job.name = "merge_realigned." + sample.name jobs.append(job) return jobs