def run_mutect_one_job(tempdir, vcf, reference, intervals, normal_bam, tumour_bam): commands = [] for i, interval in enumerate(intervals): ival_temp_dir = os.path.join(tempdir, str(i)) helpers.makedirs(ival_temp_dir) unfiltered_output = os.path.join(ival_temp_dir, 'mutect.vcf.gz') cmd = mutect_run_command(reference, interval, normal_bam, tumour_bam, unfiltered_output) commands.append(cmd) output = os.path.join(ival_temp_dir, 'mutect.vcf.gz') cmd = mutect_filter_command(reference, unfiltered_output, output) commands.append(cmd) parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp') helpers.run_in_gnu_parallel(commands, parallel_temp_dir) vcf_files = [ os.path.join(tempdir, str(i), 'mutect.vcf.gz') for i in range(len(intervals)) ] merge_tempdir = os.path.join(tempdir, 'mutect_merge') helpers.makedirs(merge_tempdir) merge_vcfs(vcf_files, vcf, merge_tempdir)
def run_samtools_germline_one_job(tempdir, vcf, reference, intervals, bam_file, samtools_docker_image=None, vcftools_docker_image=None): commands = [] for i, interval in enumerate(intervals): ival_temp_dir = os.path.join(tempdir, str(i)) helpers.makedirs(ival_temp_dir) output = os.path.join(ival_temp_dir, 'germline.vcf.gz') cmd = samtools_germline_command(output, reference, interval, bam_file) commands.append(cmd) parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp') helpers.run_in_gnu_parallel(commands, parallel_temp_dir, samtools_docker_image) vcf_files = [ os.path.join(tempdir, str(i), 'germline.vcf.gz') for i in range(len(intervals)) ] merge_tempdir = os.path.join(tempdir, 'germline_merge') helpers.makedirs(merge_tempdir) merge_vcfs(vcf_files, vcf, merge_tempdir, docker_image=vcftools_docker_image)
def run_samtools_germline_one_job(tempdir, vcf, reference, intervals, bam_file): commands = [] for i, interval in enumerate(intervals): ival_temp_dir = os.path.join(tempdir, str(i)) helpers.makedirs(ival_temp_dir) output = os.path.join(ival_temp_dir, 'germline.vcf.gz') cmd = samtools_germline_command(output, reference, interval, bam_file) commands.append(cmd) parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp') helpers.run_in_gnu_parallel(commands, parallel_temp_dir) vcf_files = [ os.path.join(tempdir, str(i), 'germline.vcf.gz') for i in range(len(intervals)) ] merge_tempdir = os.path.join(tempdir, 'germline_merge') helpers.makedirs(merge_tempdir) temp_vcf = os.path.join(merge_tempdir, 'merged_rtg.vcf') merge_vcfs(vcf_files, temp_vcf, merge_tempdir) normal_id = bamutils.get_sample_id(bam_file) vcfutils.update_germline_header_sample_ids(temp_vcf, vcf, normal_id)
def run_museq_one_job(tempdir, museq_vcf, reference, intervals, museq_params, tumour_bam=None, normal_bam=None, titan_mode=False): ''' Run museq script for all chromosomes and merge VCF files :param tumour: path to tumour bam :param normal: path to normal bam :param out: path to the temporary output VCF file for the merged VCF files :param log: path to the log file :param config: path to the config YAML file ''' commands = [] for i, interval in enumerate(intervals): ival_temp_dir = os.path.join(tempdir, str(i)) helpers.makedirs(ival_temp_dir) output = os.path.join(ival_temp_dir, 'museq.vcf') log = os.path.join(ival_temp_dir, 'museq.log') command = run_museq(output, log, reference, interval, museq_params, ival_temp_dir, tumour_bam=tumour_bam, normal_bam=normal_bam, return_cmd=True, titan_mode=titan_mode) commands.append(command) parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp') helpers.run_in_gnu_parallel(commands, parallel_temp_dir) vcf_files = [ os.path.join(tempdir, str(i), 'museq.vcf') for i in range(len(intervals)) ] merge_tempdir = os.path.join(tempdir, 'museq_merge') helpers.makedirs(merge_tempdir) temp_museq_vcf = os.path.join(merge_tempdir, 'temp_museq_merge.vcf') merge_vcfs(vcf_files, temp_museq_vcf, merge_tempdir) tumour_id = get_sample_id(tumour_bam) normal_id = get_sample_id(normal_bam) update_header_sample_ids(temp_museq_vcf, museq_vcf, tumour_id, normal_id)
def strelka_one_node( normal_bam_file, tumour_bam_file, ref_genome_fasta_file, indel_file, snv_file, tmp_dir, regions, known_sizes, is_exome=False, ): commands = [] chromosomes = [val.split('_')[0] for val in regions] for chrom in chromosomes: chrom_temp_dir = os.path.join(tmp_dir, 'chroms', str(chrom)) helpers.makedirs(chrom_temp_dir) outfile = os.path.join(chrom_temp_dir, 'depth.txt') cmd = [ 'GetChromDepth', '--align-file', normal_bam_file, '--chrom', chrom, '--output-file', outfile, # '--ref', ref_genome, ] commands.append(cmd) parallel_temp_dir = os.path.join(tmp_dir, 'gnu_parallel_temp_depths') helpers.run_in_gnu_parallel(commands, parallel_temp_dir) depthfiles = [os.path.join(tmp_dir, 'chroms', str(chrom), 'depth.txt') for chrom in chromosomes] depth_file = os.path.join(tmp_dir, 'chrom_depths.txt') merge_chromosome_depths_plain(depthfiles, depth_file) commands = [] for i, region in enumerate(regions): ival_temp_dir = os.path.join(tmp_dir, 'intervals', str(i)) helpers.makedirs(ival_temp_dir) indel_out = os.path.join(ival_temp_dir, 'strelka_indel.vcf') snv_out = os.path.join(ival_temp_dir, 'strelka_snv.vcf') stats_out = os.path.join(ival_temp_dir, 'stats.txt') cmd = genome_segment_cmd( depth_file, normal_bam_file, tumour_bam_file, ref_genome_fasta_file, indel_out, snv_out, stats_out, region, known_sizes, is_exome=is_exome, ) commands.append(cmd) parallel_temp_dir = os.path.join(tmp_dir, 'gnu_parallel_temp') helpers.run_in_gnu_parallel(commands, parallel_temp_dir) indel_files = [os.path.join(tmp_dir, 'intervals', str(i), 'strelka_indel.vcf') for i, region in enumerate(regions)] merge_temp = os.path.join(tmp_dir, 'snv_merge') snv_files = [os.path.join(tmp_dir, 'intervals', str(i), 'strelka_snv.vcf') for i, region in enumerate(regions)] temp_strelka_snv = os.path.join(tmp_dir, 'snv_merge', 'temp_strelka_merge_snv.vcf') concatenate_vcf(snv_files, temp_strelka_snv, merge_temp) temp_strelka_indel = os.path.join(tmp_dir, 'indel_merge' 'temp_strelka_merge_indel.vcf') concatenate_vcf(indel_files, temp_strelka_indel, merge_temp) tumour_id = get_sample_id(tumour_bam_file) normal_id = get_sample_id(normal_bam_file) update_header_sample_ids(temp_strelka_snv, snv_file, tumour_id, normal_id) update_header_sample_ids(temp_strelka_indel, indel_file, tumour_id, normal_id)