def piped_bamprep(data, region=None, out_file=None): """Perform full BAM preparation using pipes to avoid intermediate disk IO. Handles recalibration and realignment of original BAMs. """ data["region"] = region if not _need_prep(data): return [data] else: utils.safe_makedir(os.path.dirname(out_file)) if region[0] == "nochrom": prep_bam = shared.write_nochr_reads(data["work_bam"], out_file, data["config"]) elif region[0] == "noanalysis": prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file, data["config"]) else: if not utils.file_exists(out_file): with tx_tmpdir(data) as tmp_dir: _piped_bamprep_region(data, region, out_file, tmp_dir) prep_bam = out_file bam.index(prep_bam, data["config"]) data["work_bam"] = prep_bam return [data]
def write_recal_bam(data, region=None, out_file=None): """Step 2 of GATK recalibration -- use covariates to re-write output file. """ config = data["config"] if out_file is None: out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0] logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file)) if region == "nochr": out_bam = write_nochr_reads(data["work_bam"], out_file) else: out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region, data["sam_ref"], out_file, config) qual_bin = config["algorithm"].get("quality_bin", None) if ((qual_bin is True or qual_bin == "postrecal" or isinstance(qual_bin, list) and "postrecal" in qual_bin) and has_aligned_reads(out_bam)): binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"], os.path.dirname(out_bam), config) shutil.move(out_bam, out_bam + ".binned") shutil.move(binned_bam, out_bam) utils.save_diskspace(out_bam + ".binned", "Quality binned to %s" % out_bam, config) data["work_bam"] = out_bam return [data]
def realign_sample(data, region=None, out_file=None): """Realign sample BAM file at indels. """ realigner = data["config"]["algorithm"].get("realign", True) realigner = "gatk" if realigner is True else realigner realign_fn = _realign_approaches[realigner] if realigner else None if realign_fn: logger.info("Realigning %s with %s: %s %s" % (data["name"], realigner, os.path.basename( data["work_bam"]), region)) sam_ref = data["sam_ref"] config = data["config"] if region == "nochr": realign_bam = write_nochr_reads(data["work_bam"], out_file, data["config"]) else: realign_bam = realign_fn( data["work_bam"], sam_ref, config, data["genome_resources"]["variation"]["dbsnp"], region, out_file) if region is None: save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam, config) data["work_bam"] = realign_bam return [data]
def write_recal_bam(data, region=None, out_file=None): """Step 2 of GATK recalibration -- use covariates to re-write output file. """ config = data["config"] if out_file is None: out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0] logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file)) if region == "nochr": out_bam = write_nochr_reads(data["work_bam"], out_file) else: out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region, data["sam_ref"], out_file, config) data["work_bam"] = out_bam return [data]
def realign_sample(data, region=None, out_file=None): """Realign sample BAM file at indels. """ logger.info("Realigning %s with GATK: %s %s" % (data["name"], os.path.basename(data["work_bam"]), region)) if data["config"]["algorithm"]["snpcall"]: sam_ref = data["sam_ref"] config = data["config"] if region == "nochr": realign_bam = write_nochr_reads(data["work_bam"], out_file) else: realign_bam = gatk_realigner( data["work_bam"], sam_ref, config, configured_ref_file("dbsnp", config, sam_ref), region, out_file ) if region is None: save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam, config) data["work_bam"] = realign_bam return [data]
def piped_bamprep(data, region=None, out_file=None): """Perform full BAM preparation using pipes to avoid intermediate disk IO. Handles de-duplication, recalibration and realignment of original BAMs. """ if region[0] == "nochrom": prep_bam = shared.write_nochr_reads(data["work_bam"], out_file) elif region[0] == "noanalysis": prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file) else: if not utils.file_exists(out_file): with utils.curdir_tmpdir() as tmp_dir: _piped_bamprep_region(data, region, out_file, tmp_dir) prep_bam = out_file data["work_bam"] = prep_bam data["regions"]["current"] = region return [data]
def piped_bamprep(data, region=None, out_file=None): """Perform full BAM preparation using pipes to avoid intermediate disk IO. Handles de-duplication, recalibration and realignment of original BAMs. """ utils.safe_makedir(os.path.dirname(out_file)) if region[0] == "nochrom": prep_bam = shared.write_nochr_reads(data["work_bam"], out_file) elif region[0] == "noanalysis": prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file) else: if not utils.file_exists(out_file): with utils.curdir_tmpdir() as tmp_dir: _piped_bamprep_region(data, region, out_file, tmp_dir) prep_bam = out_file broad_runner = broad.runner_from_config(data["config"]) broad_runner.run_fn("picard_index", prep_bam) data["work_bam"] = prep_bam data["region"] = region return [data]
def realign_sample(data, region=None, out_file=None): """Realign sample BAM file at indels. """ logger.info("Realigning %s with GATK: %s %s" % (data["name"], os.path.basename(data["work_bam"]), region)) if data["config"]["algorithm"]["snpcall"]: sam_ref = data["sam_ref"] config = data["config"] if region == "nochr": realign_bam = write_nochr_reads(data["work_bam"], out_file) else: realign_bam = gatk_realigner( data["work_bam"], sam_ref, config, configured_ref_file("dbsnp", config, sam_ref), region, out_file) if region is None: save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam, config) data["work_bam"] = realign_bam return [data]
def realign_sample(data, region=None, out_file=None): """Realign sample BAM file at indels. """ realigner = data["config"]["algorithm"].get("realign", True) realigner = "gatk" if realigner is True else realigner realign_fn = _realign_approaches[realigner] if realigner else None if realign_fn: logger.info("Realigning %s with %s: %s %s" % (data["name"], realigner, os.path.basename(data["work_bam"]), region)) sam_ref = data["sam_ref"] config = data["config"] if region == "nochr": realign_bam = write_nochr_reads(data["work_bam"], out_file) else: realign_bam = realign_fn(data["work_bam"], sam_ref, config, data["genome_resources"]["variation"]["dbsnp"], region, out_file) if region is None: save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam, config) data["work_bam"] = realign_bam return [data]
def write_recal_bam(data, region=None, out_file=None): """Step 2 of GATK recalibration -- use covariates to re-write output file. """ config = data["config"] if out_file is None: out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0] logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file)) if region == "nochr": out_bam = write_nochr_reads(data["work_bam"], out_file, data["config"]) else: out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region, data["sam_ref"], out_file, config) qual_bin = config["algorithm"].get("quality_bin", None) if ((qual_bin is True or qual_bin == "postrecal" or isinstance(qual_bin, list) and "postrecal" in qual_bin) and has_aligned_reads(out_bam)): binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"], os.path.dirname(out_bam), config) shutil.move(out_bam, out_bam + ".binned") shutil.move(binned_bam, out_bam) utils.save_diskspace(out_bam + ".binned", "Quality binned to %s" % out_bam, config) data["work_bam"] = out_bam return [data]