def _run_recal_bam(dup_align_bam, recal_file, region, ref_file, out_file, config):
    """Run BAM recalibration with the given input
    """
    if not file_exists(out_file):
        if _recal_available(recal_file):
            broad_runner = broad.runner_from_config(config)
            with curdir_tmpdir() as tmp_dir:
                with file_transaction(out_file) as tx_out_file:
                    params = ["-T", "PrintReads",
                              "-BQSR", recal_file,
                              "-R", ref_file,
                              "-I", dup_align_bam,
                              "--out", tx_out_file,
                              ]
                    base_bed = config["algorithm"].get("variant_regions", None)
                    region_bed = subset_variant_regions(base_bed, region, tx_out_file)
                    if region_bed:
                        params += ["-L", region_bed, "--interval_set_rule", "INTERSECTION"]
                    elif region:
                        params += ["-L", region, "--interval_set_rule", "INTERSECTION"]
                    broad_runner.run_gatk(params, tmp_dir)
        elif region:
            subset_bam_by_region(dup_align_bam, region, out_file)
        else:
            shutil.copy(dup_align_bam, out_file)
    return out_file
Esempio n. 2
0
def _run_recal_bam(dup_align_bam, recal_file, region, ref_file, out_file,
                   config):
    """Run BAM recalibration with the given input
    """
    if not file_exists(out_file):
        if _recal_available(recal_file):
            broad_runner = broad.runner_from_config(config)
            intervals = config["algorithm"].get("variant_regions", None)
            with curdir_tmpdir() as tmp_dir:
                with file_transaction(out_file) as tx_out_file:
                    params = [
                        "-T",
                        "PrintReads",
                        "-BQSR",
                        recal_file,
                        "-R",
                        ref_file,
                        "-I",
                        dup_align_bam,
                        "--out",
                        tx_out_file,
                    ]
                    if region:
                        params += ["-L", region]
                    if intervals:
                        params += ["-L", intervals]
                    if params and intervals:
                        params += ["--interval_set_rule", "INTERSECTION"]
                    broad_runner.run_gatk(params, tmp_dir)
        elif region:
            subset_bam_by_region(dup_align_bam, region, out_file)
        else:
            shutil.copy(dup_align_bam, out_file)
    return out_file
Esempio n. 3
0
def gatk_realigner(align_bam, ref_file, config, dbsnp=None, region=None,
                   out_file=None, deep_coverage=False):
    """Realign a BAM file around indels using GATK, returning sorted BAM.
    """
    runner = broad.runner_from_config(config)
    runner.run_fn("picard_index", align_bam)
    runner.run_fn("picard_index_ref", ref_file)
    if not os.path.exists("%s.fai" % ref_file):
        pysam.faidx(ref_file)
    if region:
        align_bam = subset_bam_by_region(align_bam, region, out_file)
        runner.run_fn("picard_index", align_bam)
    if has_aligned_reads(align_bam, region):
        variant_regions = config["algorithm"].get("variant_regions", None)
        realign_target_file = gatk_realigner_targets(runner, align_bam,
                                                     ref_file, dbsnp, region,
                                                     out_file, deep_coverage,
                                                     variant_regions)
        realign_bam = gatk_indel_realignment(runner, align_bam, ref_file,
                                             realign_target_file, region,
                                             out_file, deep_coverage)
        # No longer required in recent GATK (> Feb 2011) -- now done on the fly
        # realign_sort_bam = runner.run_fn("picard_fixmate", realign_bam)
        return realign_bam
    elif out_file:
        shutil.copy(align_bam, out_file)
        return out_file
    else:
        return align_bam
Esempio n. 4
0
def gatk_realigner(align_bam,
                   ref_file,
                   config,
                   dbsnp=None,
                   region=None,
                   out_file=None,
                   deep_coverage=False):
    """Realign a BAM file around indels using GATK, returning sorted BAM.
    """
    runner = broad.runner_from_config(config)
    runner.run_fn("picard_index", align_bam)
    runner.run_fn("picard_index_ref", ref_file)
    if not os.path.exists("%s.fai" % ref_file):
        pysam.faidx(ref_file)
    if region:
        align_bam = subset_bam_by_region(align_bam, region, out_file)
        runner.run_fn("picard_index", align_bam)
    if has_aligned_reads(align_bam, region):
        variant_regions = config["algorithm"].get("variant_regions", None)
        realign_target_file = gatk_realigner_targets(runner, align_bam,
                                                     ref_file, dbsnp, region,
                                                     out_file, deep_coverage,
                                                     variant_regions)
        realign_bam = gatk_indel_realignment(runner, align_bam, ref_file,
                                             realign_target_file, region,
                                             out_file, deep_coverage)
        # No longer required in recent GATK (> Feb 2011) -- now done on the fly
        # realign_sort_bam = runner.run_fn("picard_fixmate", realign_bam)
        return realign_bam
    elif out_file:
        shutil.copy(align_bam, out_file)
        return out_file
    else:
        return align_bam