def snp_and_indel_bcf(self): jobs = [] input_bams = [os.path.join("alignment", sample.name, sample.name + ".sorted.dup.recal.bam") for sample in self.samples] nb_jobs = config.param('snp_and_indel_bcf', 'approxNbJobs', required=False, type='int') output_directory = "variants/rawBCF" bcftools_view_options = "-bvcg" if nb_jobs and nb_jobs > 1: for region in self.generate_approximate_windows(nb_jobs): job = pipe_jobs([ samtools.mpileup(input_bams, None, config.param('snp_and_indel_bcf', 'extra_mpileup_options'), region), samtools.bcftools_view("-", os.path.join(output_directory, "allSamples." + region + ".bcf"), bcftools_view_options), ]) job.name = "snp_and_indel_bcf.allSamples." + re.sub(":", "_", region) jobs.append(job) else: job = pipe_jobs([ samtools.mpileup(input_bams, None, config.param('snp_and_indel_bcf', 'extra_mpileup_options')), samtools.bcftools_view("-", os.path.join(output_directory, "allSamples.bcf"), bcftools_view_options), ]) job.name = "snp_and_indel_bcf.allSamples" jobs.append(job) for job in jobs: job.command = "mkdir -p " + output_directory + " && \\\n" + job.command return jobs
def rawmpileup(self): jobs = [] for sample in self.samples: mpileup_directory = os.path.join("alignment", sample.name, "mpileup") for sequence in self.sequence_dictionary: output = os.path.join(mpileup_directory, sample.name + "." + sequence['name'] + ".mpileup.gz") gzip_job = Job([], [output]) gzip_job.command = "gzip -1 -c > " + output job = pipe_jobs([ samtools.mpileup([os.path.join("alignment", sample.name, sample.name + ".sorted.dup.recal.bam")], None, config.param('rawmpileup', 'extra_mpileup_options'), sequence['name']), gzip_job ]) job.command = "mkdir -p " + mpileup_directory + " && \\\n" + job.command job.name = "rawmpileup." + sample.name + "." + sequence['name'] jobs.append(job) return jobs