def _normalized_bam_coverage(name, bam_input, data): """Run bamCoverage from deeptools but produce normalized bigWig files""" cmd = ("{bam_coverage} --bam {bam_input} --outFileName {bw_output} " "--binSize 20 --effectiveGenomeSize {size} " "--smoothLength 60 --extendReads 150 --centerReads -p {cores} ") size = bam.fasta.total_sequence_length(dd.get_ref_file(data)) cores = dd.get_num_cores(data) try: bam_coverage = config_utils.get_program("bamCoverage", data) except config_utils.CmdNotFound: logger.info("No bamCoverage found, skipping bamCoverage.") return None method = dd.get_chip_method(data) cmd += "--normalizeUsing CPM " toignore = get_mitochondrial_chroms(data) if toignore: ignorenormflag = f"--ignoreForNormalization {' '.join(toignore)} " cmd += ignorenormflag resources = config_utils.get_resources("bamCoverage", data["config"]) if resources: options = resources.get("options") if options: cmd += " %s" % " ".join([str(x) for x in options]) bw_output = os.path.join(os.path.dirname(bam_input), "%s.bw" % name) if utils.file_exists(bw_output): return bw_output with file_transaction(bw_output) as out_tx: do.run(cmd.format(**locals()), "Run bamCoverage in %s" % name) return bw_output
def run_ataqv(data): if not dd.get_chip_method(data) == "atac": return None work_dir = dd.get_work_dir(data) sample_name = dd.get_sample_name(data) out_dir = os.path.join(work_dir, "qc", sample_name, "ataqv") peak_file = get_full_peaks(data) bam_file = get_unfiltered_bam(data) out_file = os.path.join(out_dir, sample_name + ".ataqv.json.gz") if not peak_file: logger.info(f"Full peak file for {sample_name} not found, skipping ataqv") return None if not bam_file: logger.info(f"Unfiltered BAM file for {sample_name} not found, skipping ataqv") return None if utils.file_exists(out_file): return out_file tss_bed_file = os.path.join(out_dir, "TSS.bed") tss_bed_file = gtf.get_tss_bed(dd.get_gtf_file(data), tss_bed_file, data, padding=1000) autosomal_reference = os.path.join(out_dir, "autosomal.txt") autosomal_reference = _make_autosomal_reference_file(autosomal_reference, data) ataqv = config_utils.get_program("ataqv", data) mitoname = chromhacks.get_mitochondrial_chroms(data)[0] if not ataqv: logger.info(f"ataqv executable not found, skipping running ataqv.") return None with file_transaction(out_file) as tx_out_file: cmd = (f"{ataqv} --peak-file {peak_file} --name {sample_name} --metrics-file {tx_out_file} " f"--tss-file {tss_bed_file} --autosomal-reference-file {autosomal_reference} " f"--ignore-read-groups --mitochondrial-reference-name {mitoname} " f"None {bam_file}") message = f"Running ataqv on {sample_name}." do.run(cmd, message) return out_file
def remove_mitochondrial_reads(bam_file, data): mito = get_mitochondrial_chroms(data) if not mito: logger.info( f"Mitochondrial chromosome not identified, skipping removal of " "mitochondrial reads from {bam_file}.") return bam_file nonmito = get_nonmitochondrial_chroms(data) mito_bam = os.path.splitext(bam_file)[0] + "-noMito.bam" if utils.file_exists(mito_bam): return mito_bam samtools = config_utils.get_program("samtools", dd.get_config(data)) nonmito_flag = " ".join(nonmito) num_cores = dd.get_num_cores(data) with file_transaction(mito_bam) as tx_out_bam: cmd = (f"{samtools} view -bh -@ {num_cores} {bam_file} {nonmito_flag} " f"> {tx_out_bam}") message = f"Removing mitochondrial reads on {','.join(mito)} from {bam_file}." do.run(cmd, message) return mito_bam