Exemple #1
0
def piped_bamprep(data, region=None, out_file=None):
    """Perform full BAM preparation using pipes to avoid intermediate disk IO.

    Handles recalibration and realignment of original BAMs.
    """
    data["region"] = region
    if not _need_prep(data):
        return [data]
    else:
        utils.safe_makedir(os.path.dirname(out_file))
        if region[0] == "nochrom":
            prep_bam = shared.write_nochr_reads(data["work_bam"], out_file,
                                                data["config"])
        elif region[0] == "noanalysis":
            prep_bam = shared.write_noanalysis_reads(data["work_bam"],
                                                     region[1], out_file,
                                                     data["config"])
        else:
            if not utils.file_exists(out_file):
                with tx_tmpdir(data) as tmp_dir:
                    _piped_bamprep_region(data, region, out_file, tmp_dir)
            prep_bam = out_file
        bam.index(prep_bam, data["config"])
        data["work_bam"] = prep_bam
        return [data]
Exemple #2
0
def write_recal_bam(data, region=None, out_file=None):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    config = data["config"]
    if out_file is None:
        out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0]
    logger.info("Writing recalibrated BAM for %s to %s" %
                (data["name"], out_file))
    if region == "nochr":
        out_bam = write_nochr_reads(data["work_bam"], out_file)
    else:
        out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region,
                                 data["sam_ref"], out_file, config)
    qual_bin = config["algorithm"].get("quality_bin", None)
    if ((qual_bin is True or qual_bin == "postrecal"
         or isinstance(qual_bin, list) and "postrecal" in qual_bin)
            and has_aligned_reads(out_bam)):
        binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"],
                                            os.path.dirname(out_bam), config)
        shutil.move(out_bam, out_bam + ".binned")
        shutil.move(binned_bam, out_bam)
        utils.save_diskspace(out_bam + ".binned",
                             "Quality binned to %s" % out_bam, config)
    data["work_bam"] = out_bam
    return [data]
Exemple #3
0
def realign_sample(data, region=None, out_file=None):
    """Realign sample BAM file at indels.
    """
    realigner = data["config"]["algorithm"].get("realign", True)
    realigner = "gatk" if realigner is True else realigner
    realign_fn = _realign_approaches[realigner] if realigner else None

    if realign_fn:
        logger.info("Realigning %s with %s: %s %s" %
                    (data["name"], realigner, os.path.basename(
                        data["work_bam"]), region))
        sam_ref = data["sam_ref"]
        config = data["config"]
        if region == "nochr":
            realign_bam = write_nochr_reads(data["work_bam"], out_file,
                                            data["config"])
        else:
            realign_bam = realign_fn(
                data["work_bam"], sam_ref, config,
                data["genome_resources"]["variation"]["dbsnp"], region,
                out_file)
        if region is None:
            save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam,
                           config)
        data["work_bam"] = realign_bam
    return [data]
Exemple #4
0
def write_recal_bam(data, region=None, out_file=None):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    config = data["config"]
    if out_file is None:
        out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0]
    logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file))
    if region == "nochr":
        out_bam = write_nochr_reads(data["work_bam"], out_file)
    else:
        out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"],
                                 region, data["sam_ref"], out_file, config)
    data["work_bam"] = out_bam
    return [data]
Exemple #5
0
def write_recal_bam(data, region=None, out_file=None):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    config = data["config"]
    if out_file is None:
        out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0]
    logger.info("Writing recalibrated BAM for %s to %s" %
                (data["name"], out_file))
    if region == "nochr":
        out_bam = write_nochr_reads(data["work_bam"], out_file)
    else:
        out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region,
                                 data["sam_ref"], out_file, config)
    data["work_bam"] = out_bam
    return [data]
Exemple #6
0
def realign_sample(data, region=None, out_file=None):
    """Realign sample BAM file at indels.
    """
    logger.info("Realigning %s with GATK: %s %s" % (data["name"], os.path.basename(data["work_bam"]), region))
    if data["config"]["algorithm"]["snpcall"]:
        sam_ref = data["sam_ref"]
        config = data["config"]
        if region == "nochr":
            realign_bam = write_nochr_reads(data["work_bam"], out_file)
        else:
            realign_bam = gatk_realigner(
                data["work_bam"], sam_ref, config, configured_ref_file("dbsnp", config, sam_ref), region, out_file
            )
        if region is None:
            save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam, config)
        data["work_bam"] = realign_bam
    return [data]
Exemple #7
0
def piped_bamprep(data, region=None, out_file=None):
    """Perform full BAM preparation using pipes to avoid intermediate disk IO.

    Handles de-duplication, recalibration and realignment of original BAMs.
    """
    if region[0] == "nochrom":
        prep_bam = shared.write_nochr_reads(data["work_bam"], out_file)
    elif region[0] == "noanalysis":
        prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file)
    else:
        if not utils.file_exists(out_file):
            with utils.curdir_tmpdir() as tmp_dir:
                _piped_bamprep_region(data, region, out_file, tmp_dir)
        prep_bam = out_file
    data["work_bam"] = prep_bam
    data["regions"]["current"] = region
    return [data]
Exemple #8
0
def piped_bamprep(data, region=None, out_file=None):
    """Perform full BAM preparation using pipes to avoid intermediate disk IO.

    Handles de-duplication, recalibration and realignment of original BAMs.
    """
    utils.safe_makedir(os.path.dirname(out_file))
    if region[0] == "nochrom":
        prep_bam = shared.write_nochr_reads(data["work_bam"], out_file)
    elif region[0] == "noanalysis":
        prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file)
    else:
        if not utils.file_exists(out_file):
            with utils.curdir_tmpdir() as tmp_dir:
                _piped_bamprep_region(data, region, out_file, tmp_dir)
        prep_bam = out_file
    broad_runner = broad.runner_from_config(data["config"])
    broad_runner.run_fn("picard_index", prep_bam)
    data["work_bam"] = prep_bam
    data["region"] = region
    return [data]
Exemple #9
0
def realign_sample(data, region=None, out_file=None):
    """Realign sample BAM file at indels.
    """
    logger.info("Realigning %s with GATK: %s %s" %
                (data["name"], os.path.basename(data["work_bam"]), region))
    if data["config"]["algorithm"]["snpcall"]:
        sam_ref = data["sam_ref"]
        config = data["config"]
        if region == "nochr":
            realign_bam = write_nochr_reads(data["work_bam"], out_file)
        else:
            realign_bam = gatk_realigner(
                data["work_bam"], sam_ref, config,
                configured_ref_file("dbsnp", config, sam_ref), region,
                out_file)
        if region is None:
            save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam,
                           config)
        data["work_bam"] = realign_bam
    return [data]
Exemple #10
0
def piped_bamprep(data, region=None, out_file=None):
    """Perform full BAM preparation using pipes to avoid intermediate disk IO.

    Handles de-duplication, recalibration and realignment of original BAMs.
    """
    utils.safe_makedir(os.path.dirname(out_file))
    if region[0] == "nochrom":
        prep_bam = shared.write_nochr_reads(data["work_bam"], out_file)
    elif region[0] == "noanalysis":
        prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file)
    else:
        if not utils.file_exists(out_file):
            with utils.curdir_tmpdir() as tmp_dir:
                _piped_bamprep_region(data, region, out_file, tmp_dir)
        prep_bam = out_file
    broad_runner = broad.runner_from_config(data["config"])
    broad_runner.run_fn("picard_index", prep_bam)
    data["work_bam"] = prep_bam
    data["region"] = region
    return [data]
Exemple #11
0
def piped_bamprep(data, region=None, out_file=None):
    """Perform full BAM preparation using pipes to avoid intermediate disk IO.

    Handles recalibration and realignment of original BAMs.
    """
    data["region"] = region
    if not _need_prep(data):
        return [data]
    else:
        utils.safe_makedir(os.path.dirname(out_file))
        if region[0] == "nochrom":
            prep_bam = shared.write_nochr_reads(data["work_bam"], out_file, data["config"])
        elif region[0] == "noanalysis":
            prep_bam = shared.write_noanalysis_reads(data["work_bam"], region[1], out_file, data["config"])
        else:
            if not utils.file_exists(out_file):
                with tx_tmpdir(data) as tmp_dir:
                    _piped_bamprep_region(data, region, out_file, tmp_dir)
            prep_bam = out_file
        bam.index(prep_bam, data["config"])
        data["work_bam"] = prep_bam
        return [data]
Exemple #12
0
def realign_sample(data, region=None, out_file=None):
    """Realign sample BAM file at indels.
    """
    realigner = data["config"]["algorithm"].get("realign", True)
    realigner = "gatk" if realigner is True else realigner
    realign_fn = _realign_approaches[realigner] if realigner else None

    if realign_fn:
        logger.info("Realigning %s with %s: %s %s" % (data["name"], realigner,
                                                      os.path.basename(data["work_bam"]),
                                                      region))
        sam_ref = data["sam_ref"]
        config = data["config"]
        if region == "nochr":
            realign_bam = write_nochr_reads(data["work_bam"], out_file)
        else:
            realign_bam = realign_fn(data["work_bam"], sam_ref, config,
                                     data["genome_resources"]["variation"]["dbsnp"],
                                     region, out_file)
        if region is None:
            save_diskspace(data["work_bam"], "Realigned to %s" % realign_bam,
                           config)
        data["work_bam"] = realign_bam
    return [data]
def write_recal_bam(data, region=None, out_file=None):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    config = data["config"]
    if out_file is None:
        out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0]
    logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file))
    if region == "nochr":
        out_bam = write_nochr_reads(data["work_bam"], out_file, data["config"])
    else:
        out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"],
                                 region, data["sam_ref"], out_file, config)
    qual_bin = config["algorithm"].get("quality_bin", None)
    if ((qual_bin is True or qual_bin == "postrecal" or
         isinstance(qual_bin, list) and "postrecal" in qual_bin)
         and has_aligned_reads(out_bam)):
        binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"],
                                         os.path.dirname(out_bam), config)
        shutil.move(out_bam, out_bam + ".binned")
        shutil.move(binned_bam, out_bam)
        utils.save_diskspace(out_bam + ".binned",
                             "Quality binned to %s" % out_bam, config)
    data["work_bam"] = out_bam
    return [data]