Exemple #1
0
def _convert_bam_to_fastq(in_file, work_dir, data, dirs, config):
    """Convert BAM input file into FASTQ files.
    """
    out_dir = safe_makedir(os.path.join(work_dir, "fastq_convert"))

    qual_bin_method = config["algorithm"].get("quality_bin")
    if (qual_bin_method == "prealignment"
            or (isinstance(qual_bin_method, list)
                and "prealignment" in qual_bin_method)):
        out_bindir = safe_makedir(os.path.join(out_dir, "qualbin"))
        in_file = cram.illumina_qual_bin(in_file, data["sam_ref"], out_bindir,
                                         config)

    out_files = [
        os.path.join(
            out_dir, "{0}_{1}.fastq".format(
                os.path.splitext(os.path.basename(in_file))[0], x))
        for x in ["1", "2"]
    ]
    if bam.is_paired(in_file):
        out1, out2 = out_files
    else:
        out1 = out_files[0]
        out2 = None
    if not file_exists(out1):
        broad_runner = broad.runner_from_config(config)
        broad_runner.run_fn("picard_bam_to_fastq", in_file, out1, out2)
    if out2 and os.path.getsize(out2) == 0:
        out2 = None
    return [out1, out2]
Exemple #2
0
def _convert_bam_to_fastq(in_file, work_dir, data, dirs, config):
    """Convert BAM input file into FASTQ files.
    """
    out_dir = safe_makedir(os.path.join(work_dir, "fastq_convert"))

    qual_bin_method = config["algorithm"].get("quality_bin")
    if (qual_bin_method == "prealignment" or
         (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)):
        out_bindir = safe_makedir(os.path.join(out_dir, "qualbin"))
        in_file = cram.illumina_qual_bin(in_file, data["sam_ref"], out_bindir, config)

    out_files = [os.path.join(out_dir, "{0}_{1}.fastq".format(
                 os.path.splitext(os.path.basename(in_file))[0], x))
                 for x in ["1", "2"]]
    if bam.is_paired(in_file):
        out1, out2 = out_files
    else:
        out1 = out_files[0]
        out2 = None
    if not file_exists(out1):
        broad_runner = broad.runner_from_path("picard", config)
        broad_runner.run_fn("picard_bam_to_fastq", in_file, out1, out2)
    if out2 and os.path.getsize(out2) == 0:
        out2 = None
    return [out1, out2]
Exemple #3
0
def write_recal_bam(data, region=None, out_file=None):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    config = data["config"]
    if out_file is None:
        out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0]
    logger.info("Writing recalibrated BAM for %s to %s" %
                (data["name"], out_file))
    if region == "nochr":
        out_bam = write_nochr_reads(data["work_bam"], out_file)
    else:
        out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region,
                                 data["sam_ref"], out_file, config)
    qual_bin = config["algorithm"].get("quality_bin", None)
    if ((qual_bin is True or qual_bin == "postrecal"
         or isinstance(qual_bin, list) and "postrecal" in qual_bin)
            and has_aligned_reads(out_bam)):
        binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"],
                                            os.path.dirname(out_bam), config)
        shutil.move(out_bam, out_bam + ".binned")
        shutil.move(binned_bam, out_bam)
        utils.save_diskspace(out_bam + ".binned",
                             "Quality binned to %s" % out_bam, config)
    data["work_bam"] = out_bam
    return [data]
Exemple #4
0
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, config):
    qual_bin_method = config["algorithm"].get("quality_bin")
    if qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method):
        out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin"))
        fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config)
    align_fn = _tools[aligner].bam_align_fn
    return align_fn(fastq1, align_ref, names, align_dir, config)
Exemple #5
0
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, config):
    qual_bin_method = config["algorithm"].get("quality_bin")
    if (qual_bin_method == "prealignment" or
         (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)):
        out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin"))
        fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config)
    align_fn = _tools[aligner].bam_align_fn
    if align_fn is None:
        raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner)
    return align_fn(fastq1, align_ref, names, align_dir, config)
Exemple #6
0
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, data):
    assert not data.get("align_split"), "Do not handle split alignments with BAM yet"
    config = data["config"]
    qual_bin_method = config["algorithm"].get("quality_bin")
    if qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method):
        out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin"))
        fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config)
    align_fn = TOOLS[aligner].bam_align_fn
    if align_fn is None:
        raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner)
    return align_fn(fastq1, align_ref, names, align_dir, config)
Exemple #7
0
def split_read_files(fastq1, fastq2, item, split_size, out_dir, dirs, config):
    """Split input reads for parallel processing, dispatching on input type.
    """
    if fastq1.endswith(".bam") and fastq2 is None:
        qual_bin_method = config["algorithm"].get("quality_bin")
        if (qual_bin_method == "prealignment" or
             (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)):
            out_bindir = utils.safe_makedir(os.path.join(out_dir, "qualbin"))
            fastq1 = cram.illumina_qual_bin(fastq1, item["sam_ref"], out_bindir, config)
        return split_bam_file(fastq1, split_size, out_dir, config)
    else:
        return split_fastq_files(fastq1, fastq2, split_size, out_dir, config)
Exemple #8
0
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir,
                    config):
    qual_bin_method = config["algorithm"].get("quality_bin")
    if (qual_bin_method == "prealignment"
            or (isinstance(qual_bin_method, list)
                and "prealignment" in qual_bin_method)):
        out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin"))
        fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config)
    align_fn = TOOLS[aligner].bam_align_fn
    if align_fn is None:
        raise NotImplementedError("Do not yet support BAM alignment with %s" %
                                  aligner)
    return align_fn(fastq1, align_ref, names, align_dir, config)
Exemple #9
0
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, data):
    assert not data.get("align_split"), "Do not handle split alignments with BAM yet"
    config = data["config"]
    qual_bin_method = config["algorithm"].get("quality_bin")
    if (qual_bin_method == "prealignment" or
         (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)):
        out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin"))
        fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config)
    align_fn = TOOLS[aligner].bam_align_fn
    if align_fn is None:
        raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner)
    out = align_fn(fastq1, align_ref, names, align_dir, data)
    if isinstance(out, dict):
        assert "work_bam" in out
        return out
    else:
        data["work_bam"] = out
        return data
def write_recal_bam(data, region=None, out_file=None):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    config = data["config"]
    if out_file is None:
        out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0]
    logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file))
    if region == "nochr":
        out_bam = write_nochr_reads(data["work_bam"], out_file, data["config"])
    else:
        out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"],
                                 region, data["sam_ref"], out_file, config)
    qual_bin = config["algorithm"].get("quality_bin", None)
    if ((qual_bin is True or qual_bin == "postrecal" or
         isinstance(qual_bin, list) and "postrecal" in qual_bin)
         and has_aligned_reads(out_bam)):
        binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"],
                                         os.path.dirname(out_bam), config)
        shutil.move(out_bam, out_bam + ".binned")
        shutil.move(binned_bam, out_bam)
        utils.save_diskspace(out_bam + ".binned",
                             "Quality binned to %s" % out_bam, config)
    data["work_bam"] = out_bam
    return [data]