def _convert_bam_to_fastq(in_file, work_dir, data, dirs, config): """Convert BAM input file into FASTQ files. """ out_dir = safe_makedir(os.path.join(work_dir, "fastq_convert")) qual_bin_method = config["algorithm"].get("quality_bin") if (qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)): out_bindir = safe_makedir(os.path.join(out_dir, "qualbin")) in_file = cram.illumina_qual_bin(in_file, data["sam_ref"], out_bindir, config) out_files = [ os.path.join( out_dir, "{0}_{1}.fastq".format( os.path.splitext(os.path.basename(in_file))[0], x)) for x in ["1", "2"] ] if bam.is_paired(in_file): out1, out2 = out_files else: out1 = out_files[0] out2 = None if not file_exists(out1): broad_runner = broad.runner_from_config(config) broad_runner.run_fn("picard_bam_to_fastq", in_file, out1, out2) if out2 and os.path.getsize(out2) == 0: out2 = None return [out1, out2]
def _convert_bam_to_fastq(in_file, work_dir, data, dirs, config): """Convert BAM input file into FASTQ files. """ out_dir = safe_makedir(os.path.join(work_dir, "fastq_convert")) qual_bin_method = config["algorithm"].get("quality_bin") if (qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)): out_bindir = safe_makedir(os.path.join(out_dir, "qualbin")) in_file = cram.illumina_qual_bin(in_file, data["sam_ref"], out_bindir, config) out_files = [os.path.join(out_dir, "{0}_{1}.fastq".format( os.path.splitext(os.path.basename(in_file))[0], x)) for x in ["1", "2"]] if bam.is_paired(in_file): out1, out2 = out_files else: out1 = out_files[0] out2 = None if not file_exists(out1): broad_runner = broad.runner_from_path("picard", config) broad_runner.run_fn("picard_bam_to_fastq", in_file, out1, out2) if out2 and os.path.getsize(out2) == 0: out2 = None return [out1, out2]
def write_recal_bam(data, region=None, out_file=None): """Step 2 of GATK recalibration -- use covariates to re-write output file. """ config = data["config"] if out_file is None: out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0] logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file)) if region == "nochr": out_bam = write_nochr_reads(data["work_bam"], out_file) else: out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region, data["sam_ref"], out_file, config) qual_bin = config["algorithm"].get("quality_bin", None) if ((qual_bin is True or qual_bin == "postrecal" or isinstance(qual_bin, list) and "postrecal" in qual_bin) and has_aligned_reads(out_bam)): binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"], os.path.dirname(out_bam), config) shutil.move(out_bam, out_bam + ".binned") shutil.move(binned_bam, out_bam) utils.save_diskspace(out_bam + ".binned", "Quality binned to %s" % out_bam, config) data["work_bam"] = out_bam return [data]
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, config): qual_bin_method = config["algorithm"].get("quality_bin") if qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method): out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin")) fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config) align_fn = _tools[aligner].bam_align_fn return align_fn(fastq1, align_ref, names, align_dir, config)
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, config): qual_bin_method = config["algorithm"].get("quality_bin") if (qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)): out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin")) fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config) align_fn = _tools[aligner].bam_align_fn if align_fn is None: raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner) return align_fn(fastq1, align_ref, names, align_dir, config)
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, data): assert not data.get("align_split"), "Do not handle split alignments with BAM yet" config = data["config"] qual_bin_method = config["algorithm"].get("quality_bin") if qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method): out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin")) fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config) align_fn = TOOLS[aligner].bam_align_fn if align_fn is None: raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner) return align_fn(fastq1, align_ref, names, align_dir, config)
def split_read_files(fastq1, fastq2, item, split_size, out_dir, dirs, config): """Split input reads for parallel processing, dispatching on input type. """ if fastq1.endswith(".bam") and fastq2 is None: qual_bin_method = config["algorithm"].get("quality_bin") if (qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)): out_bindir = utils.safe_makedir(os.path.join(out_dir, "qualbin")) fastq1 = cram.illumina_qual_bin(fastq1, item["sam_ref"], out_bindir, config) return split_bam_file(fastq1, split_size, out_dir, config) else: return split_fastq_files(fastq1, fastq2, split_size, out_dir, config)
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, config): qual_bin_method = config["algorithm"].get("quality_bin") if (qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)): out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin")) fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config) align_fn = TOOLS[aligner].bam_align_fn if align_fn is None: raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner) return align_fn(fastq1, align_ref, names, align_dir, config)
def _align_from_bam(fastq1, aligner, align_ref, sam_ref, names, align_dir, data): assert not data.get("align_split"), "Do not handle split alignments with BAM yet" config = data["config"] qual_bin_method = config["algorithm"].get("quality_bin") if (qual_bin_method == "prealignment" or (isinstance(qual_bin_method, list) and "prealignment" in qual_bin_method)): out_dir = utils.safe_makedir(os.path.join(align_dir, "qualbin")) fastq1 = cram.illumina_qual_bin(fastq1, sam_ref, out_dir, config) align_fn = TOOLS[aligner].bam_align_fn if align_fn is None: raise NotImplementedError("Do not yet support BAM alignment with %s" % aligner) out = align_fn(fastq1, align_ref, names, align_dir, data) if isinstance(out, dict): assert "work_bam" in out return out else: data["work_bam"] = out return data
def write_recal_bam(data, region=None, out_file=None): """Step 2 of GATK recalibration -- use covariates to re-write output file. """ config = data["config"] if out_file is None: out_file = "%s-gatkrecal.bam" % os.path.splitext(data["work_bam"])[0] logger.info("Writing recalibrated BAM for %s to %s" % (data["name"], out_file)) if region == "nochr": out_bam = write_nochr_reads(data["work_bam"], out_file, data["config"]) else: out_bam = _run_recal_bam(data["work_bam"], data["prep_recal"], region, data["sam_ref"], out_file, config) qual_bin = config["algorithm"].get("quality_bin", None) if ((qual_bin is True or qual_bin == "postrecal" or isinstance(qual_bin, list) and "postrecal" in qual_bin) and has_aligned_reads(out_bam)): binned_bam = cram.illumina_qual_bin(out_bam, data["sam_ref"], os.path.dirname(out_bam), config) shutil.move(out_bam, out_bam + ".binned") shutil.move(binned_bam, out_bam) utils.save_diskspace(out_bam + ".binned", "Quality binned to %s" % out_bam, config) data["work_bam"] = out_bam return [data]