def _run_recal_bam(dup_align_bam, recal_file, region, ref_file, out_file, config): """Run BAM recalibration with the given input """ if not file_exists(out_file): if _recal_available(recal_file): broad_runner = broad.runner_from_config(config) with curdir_tmpdir() as tmp_dir: with file_transaction(out_file) as tx_out_file: params = ["-T", "PrintReads", "-BQSR", recal_file, "-R", ref_file, "-I", dup_align_bam, "--out", tx_out_file, ] base_bed = config["algorithm"].get("variant_regions", None) region_bed = subset_variant_regions(base_bed, region, tx_out_file) if region_bed: params += ["-L", region_bed, "--interval_set_rule", "INTERSECTION"] elif region: params += ["-L", region, "--interval_set_rule", "INTERSECTION"] broad_runner.run_gatk(params, tmp_dir) elif region: subset_bam_by_region(dup_align_bam, region, out_file) else: shutil.copy(dup_align_bam, out_file) return out_file
def _run_recal_bam(dup_align_bam, recal_file, region, ref_file, out_file, config): """Run BAM recalibration with the given input """ if not file_exists(out_file): if _recal_available(recal_file): broad_runner = broad.runner_from_config(config) intervals = config["algorithm"].get("variant_regions", None) with curdir_tmpdir() as tmp_dir: with file_transaction(out_file) as tx_out_file: params = [ "-T", "PrintReads", "-BQSR", recal_file, "-R", ref_file, "-I", dup_align_bam, "--out", tx_out_file, ] if region: params += ["-L", region] if intervals: params += ["-L", intervals] if params and intervals: params += ["--interval_set_rule", "INTERSECTION"] broad_runner.run_gatk(params, tmp_dir) elif region: subset_bam_by_region(dup_align_bam, region, out_file) else: shutil.copy(dup_align_bam, out_file) return out_file
def gatk_realigner(align_bam, ref_file, config, dbsnp=None, region=None, out_file=None, deep_coverage=False): """Realign a BAM file around indels using GATK, returning sorted BAM. """ runner = broad.runner_from_config(config) runner.run_fn("picard_index", align_bam) runner.run_fn("picard_index_ref", ref_file) if not os.path.exists("%s.fai" % ref_file): pysam.faidx(ref_file) if region: align_bam = subset_bam_by_region(align_bam, region, out_file) runner.run_fn("picard_index", align_bam) if has_aligned_reads(align_bam, region): variant_regions = config["algorithm"].get("variant_regions", None) realign_target_file = gatk_realigner_targets(runner, align_bam, ref_file, dbsnp, region, out_file, deep_coverage, variant_regions) realign_bam = gatk_indel_realignment(runner, align_bam, ref_file, realign_target_file, region, out_file, deep_coverage) # No longer required in recent GATK (> Feb 2011) -- now done on the fly # realign_sort_bam = runner.run_fn("picard_fixmate", realign_bam) return realign_bam elif out_file: shutil.copy(align_bam, out_file) return out_file else: return align_bam