def merge_postprocess_bams(inputs, output, tempdir, containers): helpers.makedirs(tempdir) merged_out = os.path.join(tempdir, 'merged_lanes.bam') picardutils.merge_bams(inputs, merged_out, docker_image=containers['picard']) bamutils.bam_index(merged_out, merged_out + '.bai', docker_image=containers['samtools']) sorted_bam = os.path.join(tempdir, 'sorted.bam') picardutils.bam_sort(merged_out, sorted_bam, tempdir, docker_image=containers['picard']) markdups_metrics = os.path.join(tempdir, 'markdups_metrics.txt') picardutils.bam_markdups(sorted_bam, output, markdups_metrics, tempdir, docker_image=containers['picard']) bamutils.bam_index(output, output + '.bai', docker_image=containers['samtools'])
def align_pe(fastq1, fastq2, output, reports, metrics, tempdir, reference, instrument, centre, sample_info, cell_id, lane_id, library_id, config): readgroup = get_readgroup(lane_id, cell_id, library_id, centre, sample_info) run_fastqc(fastq1, fastq2, reports, tempdir, config) aln_temp = os.path.join(tempdir, "temp_alignments.bam") if config["aligner"] == "bwa-mem": bwa_mem_paired_end(fastq1, fastq2, aln_temp, reference, readgroup, tempdir, config['containers']) elif config["aligner"] == "bwa-aln": if not instrument == "N550": fastq1, fastq2 = trim_fastqs(fastq1, fastq2, cell_id, tempdir, config) bwa_aln_paired_end(fastq1, fastq2, aln_temp, tempdir, reference, readgroup, config['containers']) else: raise Exception( "Aligner %s not supported, pipeline supports bwa-aln and bwa-mem" % config["aligner"]) container_ctx = helpers.get_container_ctx(config['containers'], 'picard', docker_only=True) picardutils.bam_sort(aln_temp, output, tempdir, **container_ctx) container_ctx = helpers.get_container_ctx(config['containers'], 'samtools', docker_only=True) bamutils.bam_flagstat(output, metrics, **container_ctx)
def align_pe( fastq1, fastq2, output, reports, metrics, tempdir, reference, trim, centre, sample_info, cell_id, lane_id, library_id, aligner, containers, adapter, adapter2, fastqscreen_params ): readgroup = get_readgroup( lane_id, cell_id, library_id, centre, sample_info ) run_fastqc(fastq1, fastq2, reports, tempdir, containers) aln_temp = os.path.join(tempdir, "temp_alignments.bam") if aligner == "bwa-aln" and trim: fastq1, fastq2 = trim_fastqs( fastq1, fastq2, cell_id, tempdir, adapter, adapter2, containers['trimgalore'] ) align_pe_with_bwa( fastq1, fastq2, aln_temp, reference, readgroup, tempdir, containers, aligner=aligner ) picardutils.bam_sort(aln_temp, output, tempdir, docker_image=containers['picard']) bamutils.bam_flagstat(output, metrics, docker_image=containers['samtools'])
def align_pe(fastq1, fastq2, output, reports_dir, tempdir, reference, trim, centre, sample_info, cell_id, lane_id, library_id, aligner, containers, adapter, adapter2, fastqscreen_detailed_metrics, fastqscreen_summary_metrics, fastqscreen_params): fastqscreen_tempdir = os.path.join(tempdir, 'fastq_screen') helpers.makedirs(fastqscreen_tempdir) filtered_fastq_r1 = os.path.join(fastqscreen_tempdir, "fastq_r1.fastq.gz") filtered_fastq_r2 = os.path.join(fastqscreen_tempdir, "fastq_r2.fastq.gz") fastqscreen.organism_filter( fastq1, fastq2, filtered_fastq_r1, filtered_fastq_r2, fastqscreen_detailed_metrics, fastqscreen_summary_metrics, fastqscreen_tempdir, cell_id, fastqscreen_params, reference, docker_image=containers['fastq_screen'], filter_contaminated_reads=fastqscreen_params[ 'filter_contaminated_reads'], ) readgroup = get_readgroup(lane_id, cell_id, library_id, centre, sample_info) run_fastqc(filtered_fastq_r1, filtered_fastq_r2, reports_dir, tempdir, containers) aln_temp = os.path.join(tempdir, "temp_alignments.bam") if aligner == "bwa-aln" and trim: filtered_fastq_r1, filtered_fastq_r2 = trim_fastqs( filtered_fastq_r1, filtered_fastq_r2, cell_id, tempdir, adapter, adapter2, containers['trimgalore']) align_pe_with_bwa(filtered_fastq_r1, filtered_fastq_r2, aln_temp, reference, readgroup, tempdir, containers, aligner=aligner) picardutils.bam_sort(aln_temp, output, tempdir, docker_image=containers['picard']) metrics = os.path.join(reports_dir, 'flagstat_metrics.txt') bamutils.bam_flagstat(output, metrics, docker_image=containers['samtools'])
def merge_postprocess_bams(inputs, output, tempdir): helpers.makedirs(tempdir) merged_out = os.path.join(tempdir, 'merged_lanes.bam') picardutils.merge_bams(inputs, merged_out) bamutils.bam_index(merged_out, merged_out + '.bai') sorted_bam = os.path.join(tempdir, 'sorted.bam') picardutils.bam_sort(merged_out, sorted_bam, tempdir) markdups_metrics = os.path.join(tempdir, 'markdups_metrics.txt') picardutils.bam_markdups(sorted_bam, output, markdups_metrics, tempdir) bamutils.bam_index(output, output + '.bai')
def postprocess_bam(infile, outfile, tempdir, containers): outfile_index = outfile + '.bai' if not os.path.exists(tempdir): helpers.makedirs(tempdir) sorted_bam = os.path.join(tempdir, 'sorted.bam') picardutils.bam_sort(infile, sorted_bam, tempdir, docker_image=containers['picard']) markdups_metrics = os.path.join(tempdir, 'markdups_metrics.txt') picardutils.bam_markdups(sorted_bam, outfile, markdups_metrics, tempdir, docker_image=containers['picard']) bamutils.bam_index(outfile, outfile_index, docker_image=containers['samtools'])
def postprocess_bam(infile, outfile, outfile_index, tempdir, config, markdups_metrics, flagstat_metrics): if not os.path.exists(tempdir): helpers.makedirs(tempdir) container_ctx = helpers.get_container_ctx(config['containers'], 'picard', docker_only=True) sorted_bam = os.path.join(tempdir, 'sorted.bam') picardutils.bam_sort(infile, sorted_bam, tempdir, **container_ctx) picardutils.bam_markdups(sorted_bam, outfile, markdups_metrics, tempdir, **container_ctx) container_ctx = helpers.get_container_ctx(config['containers'], 'samtools', docker_only=True) bamutils.bam_index(outfile, outfile_index, **container_ctx) bamutils.bam_flagstat(outfile, flagstat_metrics, **container_ctx)
def align_pe( fastq1, fastq2, output, reports_dir, tempdir, reference, trim, center, sample_info, cell_id, lane_id, library_id, adapter, adapter2, fastqscreen_detailed_metrics, fastqscreen_summary_metrics, fastqscreen_params, ): fastqscreen_tempdir = os.path.join(tempdir, 'fastq_screen') helpers.makedirs(fastqscreen_tempdir) filtered_fastq_r1 = os.path.join(fastqscreen_tempdir, "fastq_r1.fastq.gz") filtered_fastq_r2 = os.path.join(fastqscreen_tempdir, "fastq_r2.fastq.gz") fastqscreen.organism_filter( fastq1, fastq2, filtered_fastq_r1, filtered_fastq_r2, fastqscreen_detailed_metrics, fastqscreen_summary_metrics, fastqscreen_tempdir, cell_id, fastqscreen_params ) readgroup = get_readgroup( lane_id, cell_id, library_id, center, sample_info ) # run_fastqc(filtered_fastq_r1, filtered_fastq_r2, reports_dir, tempdir) aln_temp = os.path.join(tempdir, "temp_alignments.bam") if trim: filtered_fastq_r1, filtered_fastq_r2 = trim_fastqs( filtered_fastq_r1, filtered_fastq_r2, cell_id, tempdir, adapter, adapter2 ) align_pe_with_bwa( filtered_fastq_r1, filtered_fastq_r2, aln_temp, reference, readgroup, tempdir ) picardutils.bam_sort(aln_temp, output, tempdir) metrics = os.path.join(reports_dir, 'flagstat_metrics.txt') bamutils.bam_flagstat(output, metrics)