Example #1
0
def _normalized_bam_coverage(name, bam_input, data):
    """Run bamCoverage from deeptools but produce normalized bigWig files"""
    cmd = ("{bam_coverage} --bam {bam_input} --outFileName {bw_output} "
           "--binSize 20 --effectiveGenomeSize {size} "
           "--smoothLength 60 --extendReads 150 --centerReads -p {cores} ")
    size = bam.fasta.total_sequence_length(dd.get_ref_file(data))
    cores = dd.get_num_cores(data)
    try:
        bam_coverage = config_utils.get_program("bamCoverage", data)
    except config_utils.CmdNotFound:
        logger.info("No bamCoverage found, skipping bamCoverage.")
        return None
    method = dd.get_chip_method(data)
    cmd += "--normalizeUsing CPM "
    toignore = get_mitochondrial_chroms(data)
    if toignore:
        ignorenormflag = f"--ignoreForNormalization {' '.join(toignore)} "
        cmd += ignorenormflag
    resources = config_utils.get_resources("bamCoverage", data["config"])
    if resources:
        options = resources.get("options")
        if options:
            cmd += " %s" % " ".join([str(x) for x in options])
    bw_output = os.path.join(os.path.dirname(bam_input), "%s.bw" % name)
    if utils.file_exists(bw_output):
        return bw_output
    with file_transaction(bw_output) as out_tx:
        do.run(cmd.format(**locals()), "Run bamCoverage in %s" % name)
    return bw_output
Example #2
0
def run_ataqv(data):
    if not dd.get_chip_method(data) == "atac":
        return None
    work_dir = dd.get_work_dir(data)
    sample_name = dd.get_sample_name(data)
    out_dir = os.path.join(work_dir, "qc", sample_name, "ataqv")
    peak_file = get_full_peaks(data)
    bam_file = get_unfiltered_bam(data)
    out_file = os.path.join(out_dir, sample_name + ".ataqv.json.gz")
    if not peak_file:
        logger.info(f"Full peak file for {sample_name} not found, skipping ataqv")
        return None
    if not bam_file:
        logger.info(f"Unfiltered BAM file for {sample_name} not found, skipping ataqv")
        return None
    if utils.file_exists(out_file):
        return out_file
    tss_bed_file = os.path.join(out_dir, "TSS.bed")
    tss_bed_file = gtf.get_tss_bed(dd.get_gtf_file(data), tss_bed_file, data, padding=1000)
    autosomal_reference = os.path.join(out_dir, "autosomal.txt")
    autosomal_reference = _make_autosomal_reference_file(autosomal_reference, data)
    ataqv = config_utils.get_program("ataqv", data)
    mitoname = chromhacks.get_mitochondrial_chroms(data)[0]
    if not ataqv:
        logger.info(f"ataqv executable not found, skipping running ataqv.")
        return None
    with file_transaction(out_file) as tx_out_file:
        cmd = (f"{ataqv} --peak-file {peak_file} --name {sample_name} --metrics-file {tx_out_file} "
               f"--tss-file {tss_bed_file} --autosomal-reference-file {autosomal_reference} "
               f"--ignore-read-groups --mitochondrial-reference-name {mitoname} "
               f"None {bam_file}")
        message = f"Running ataqv on {sample_name}."
        do.run(cmd, message)
    return out_file
def remove_mitochondrial_reads(bam_file, data):
    mito = get_mitochondrial_chroms(data)
    if not mito:
        logger.info(
            f"Mitochondrial chromosome not identified, skipping removal of "
            "mitochondrial reads from {bam_file}.")
        return bam_file
    nonmito = get_nonmitochondrial_chroms(data)
    mito_bam = os.path.splitext(bam_file)[0] + "-noMito.bam"
    if utils.file_exists(mito_bam):
        return mito_bam
    samtools = config_utils.get_program("samtools", dd.get_config(data))
    nonmito_flag = " ".join(nonmito)
    num_cores = dd.get_num_cores(data)
    with file_transaction(mito_bam) as tx_out_bam:
        cmd = (f"{samtools} view -bh -@ {num_cores} {bam_file} {nonmito_flag} "
               f"> {tx_out_bam}")
        message = f"Removing mitochondrial reads on {','.join(mito)} from {bam_file}."
        do.run(cmd, message)
    return mito_bam