Exemple #1
0
def priority_total_coverage(data, out_dir):
    """
    calculate coverage at 10 depth intervals in the priority regions
    """
    from bcbio.structural import prioritize
    bed_file = dd.get_svprioritize(data)
    if not bed_file and not file_exists(bed_file) or prioritize.is_gene_list(
            bed_file):
        return {}
    in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
    cleaned_bed = clean_file(bed_file, data, prefix="svprioritize-")
    work_dir = safe_makedir(out_dir)
    sample = dd.get_sample_name(data)
    out_file = os.path.join(work_dir, sample + "_priority_total_coverage.bed")
    if utils.file_uptodate(out_file, cleaned_bed) and utils.file_uptodate(
            out_file, in_bam):
        return out_file
    cmdl = sambamba.make_command(
        data,
        "depth region",
        in_bam,
        cleaned_bed,
        depth_thresholds=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    with file_transaction(out_file) as tx_out_file:
        message = "Calculating region coverage of {bed_file} in {in_bam}"
        do.run(cmdl + " -o " + tx_out_file, message.format(**locals()))
    logger.debug("Saved svprioritize coverage into " + out_file)
    return out_file
Exemple #2
0
def coverage_region_detailed_stats(data, out_dir):
    """
    Calculate coverage at different completeness cutoff
    for region in coverage option.
    """
    bed_file = dd.get_coverage(data)
    if not bed_file:
        return None
    work_dir = safe_makedir(out_dir)
    cleaned_bed = clean_file(bed_file, data, prefix="cov-", simple=True)

    with chdir(work_dir):
        in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
        sample = dd.get_sample_name(data)
        logger.debug("doing coverage for %s" % sample)
        parse_total_file = os.path.join(sample + "_cov_total.tsv")
        parse_file = os.path.join(sample + "_coverage.bed")
        if utils.file_uptodate(parse_file, cleaned_bed) and utils.file_uptodate(parse_file, in_bam):
            pass
        else:
            with file_transaction(parse_file) as out_tx:
                cmdl = sambamba.make_command(data, "depth region", in_bam, cleaned_bed,
                                             depth_thresholds=[1, 5, 10, 20, 40, 50, 60, 70, 80, 100],
                                             max_cov=1000)
                cmdl += " | sed 's/# chrom/chrom/' > " + out_tx
                do.run(cmdl, "Run coverage regional analysis for {}".format(sample))
        parse_file = _add_high_covered_regions(parse_file, cleaned_bed, sample)
        parse_file = _calculate_percentiles(os.path.abspath(parse_file), sample)
    return os.path.abspath(parse_file)
Exemple #3
0
def coverage_region_detailed_stats(data, out_dir, extra_cutoffs=None):
    """
    Calculate coverage at different completeness cutoff
    for region in coverage option.
    """
    bed_file = dd.get_coverage(data)
    if not bed_file or not utils.file_exists(bed_file):
        return []
    work_dir = safe_makedir(out_dir)
    cleaned_bed = clean_file(bed_file, data, prefix="cov-", simple=True)

    cutoffs = {1, 5, 10, 20, 50, 100, 250, 500, 1000, 5000, 10000, 50000}

    with chdir(work_dir):
        in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
        sample = dd.get_sample_name(data)
        logger.debug("doing coverage for %s" % sample)
        parse_file = os.path.join(sample + "_coverage.bed")
        if utils.file_uptodate(parse_file, cleaned_bed) and utils.file_uptodate(parse_file, in_bam):
            pass
        else:
            with file_transaction(data, parse_file) as out_tx:
                depth_thresholds = sorted(list(cutoffs | extra_cutoffs))
                cmdl = sambamba.make_command(data, "depth region", in_bam, cleaned_bed, depth_thresholds=depth_thresholds)
                cmdl += " | sed 's/# chrom/chrom/' > " + out_tx
                do.run(cmdl, "Run coverage regional analysis for {}".format(sample))
        out_files = _calculate_percentiles(os.path.abspath(parse_file), sample, data=data, cutoffs=cutoffs)
    return [os.path.abspath(x) for x in out_files]
def coverage_region_detailed_stats(data, out_dir):
    """
    Calculate coverage at different completeness cutoff
    for region in coverage option.
    """
    bed_file = dd.get_coverage(data)
    if not bed_file:
        return None
    work_dir = safe_makedir(out_dir)
    cleaned_bed = clean_file(bed_file, data, prefix="cov-", simple=True)

    with chdir(work_dir):
        in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
        sample = dd.get_sample_name(data)
        logger.debug("doing coverage for %s" % sample)
        os.path.join(sample + "_cov_total.tsv")
        parse_file = os.path.join(sample + "_coverage.bed")
        if utils.file_uptodate(parse_file, cleaned_bed) and utils.file_uptodate(parse_file, in_bam):
            pass
        else:
            with file_transaction(data, parse_file) as out_tx:
                cmdl = sambamba.make_command(data, "depth region", in_bam, cleaned_bed,
                                             depth_thresholds=[1, 5, 10, 20, 40, 50, 60, 70, 80, 100],
                                             max_cov=1000)
                cmdl += " | sed 's/# chrom/chrom/' > " + out_tx
                do.run(cmdl, "Run coverage regional analysis for {}".format(sample))
        parse_file = _add_high_covered_regions(parse_file, cleaned_bed, sample, data=data)
        parse_file = _calculate_percentiles(os.path.abspath(parse_file), sample, data=data)
    return os.path.abspath(parse_file)
def regions_coverage(data, bed_file, bam_file, target_name):
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "coverage", dd.get_sample_name(data)))
    out_file = os.path.join(work_dir, target_name + "_regions_depth.bed")
    if utils.file_uptodate(out_file, bam_file) and utils.file_uptodate(out_file, bed_file):
        return out_file
    with file_transaction(data, out_file) as tx_out_file:
        cmdl = sambamba.make_command(data, "depth region", bam_file, bed_file) + " -o " + tx_out_file
        message = "Calculating regions coverage of {target_name} in {bam_file}"
        do.run(cmdl, message.format(**locals()))
    return out_file
Exemple #6
0
def regions_coverage(data, bed_file, bam_file, target_name):
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "coverage", dd.get_sample_name(data)))
    out_file = os.path.join(work_dir, target_name + "_regions_depth.bed")
    if utils.file_uptodate(out_file, bam_file) and utils.file_uptodate(out_file, bed_file):
        return out_file
    with file_transaction(out_file) as tx_out_file:
        cmdl = sambamba.make_command(data, "depth region", bam_file, bed_file) + " -o " + tx_out_file
        message = "Calculating regions coverage of {target_name} in {bam_file}"
        do.run(cmdl, message.format(**locals()))
    return out_file
Exemple #7
0
def regions_coverage(data, bed_file, bam_file, target_name, depth_thresholds=None):
    """Generate coverage over regions of interest using sambamba depth.

    sambamba can segfault with multiple threads so provides a single threaded backup
    implementation in case of failures.
    """
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "coverage", dd.get_sample_name(data)))
    out_file = os.path.join(work_dir, target_name + "_regions_depth.bed")
    if utils.file_uptodate(out_file, bam_file) and utils.file_uptodate(out_file, bed_file):
        return out_file
    with file_transaction(data, out_file) as tx_out_file:
        try:
            cmdl = sambamba.make_command(data, "depth region", bam_file, bed_file, depth_thresholds=depth_thresholds)
            cmdl += " -o " + tx_out_file
            message = "Calculating regions coverage of {target_name} in {bam_file}"
            do.run(cmdl, message.format(**locals()))
        except subprocess.CalledProcessError:
            cmdl = sambamba.make_command(data, "depth region", bam_file, bed_file, depth_thresholds=depth_thresholds,
                                         multicore=False)
            cmdl += " -o " + tx_out_file
            message = "Calculating regions coverage of {target_name} in {bam_file} -- single thread backup"
            do.run(cmdl, message.format(**locals()))
    return out_file
def priority_coverage(data, out_dir):
    from bcbio.structural import prioritize
    bed_file = dd.get_svprioritize(data)
    if not bed_file or not file_exists(bed_file) or prioritize.is_gene_list(bed_file):
        return data

    work_dir = safe_makedir(out_dir)
    sample = dd.get_sample_name(data)
    cleaned_bed = clean_file(bed_file, data, prefix="cov-", simple=True)
    out_file = os.path.join(work_dir, sample + "_priority_depth.bed")
    in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
    if utils.file_uptodate(out_file, cleaned_bed) and utils.file_uptodate(out_file, in_bam):
        return out_file
    with file_transaction(data, out_file) as tx_out_file:
        cmdl = sambamba.make_command(data, "depth base", in_bam, cleaned_bed)
        parse_cmd = "awk '{print $1\"\t\"$2\"\t\"$2\"\t\"$3\"\t\"$10}' | sed '1d'"
        cmdl += " | {parse_cmd} > {tx_out_file}"
        message = "Calculating base coverage of {bed_file} in {in_bam}"
        do.run(cmdl.format(**locals()), message.format(**locals()))
    return out_file
Exemple #9
0
def priority_coverage(data, out_dir):
    from bcbio.structural import prioritize
    bed_file = dd.get_svprioritize(data)
    if not bed_file or not file_exists(bed_file) or prioritize.is_gene_list(bed_file):
        return data

    work_dir = safe_makedir(out_dir)
    sample = dd.get_sample_name(data)
    cleaned_bed = clean_file(bed_file, data, prefix="cov-", simple=True)
    out_file = os.path.join(work_dir, sample + "_priority_depth.bed")
    in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
    if utils.file_uptodate(out_file, cleaned_bed) and utils.file_uptodate(out_file, in_bam):
        return out_file
    with file_transaction(out_file) as tx_out_file:
        cmdl = sambamba.make_command(data, "depth base", in_bam, cleaned_bed)
        parse_cmd = "awk '{print $1\"\t\"$2\"\t\"$2\"\t\"$3\"\t\"$10}' | sed '1d'"
        cmdl += " | {parse_cmd} > {tx_out_file}"
        message = "Calculating base coverage of {bed_file} in {in_bam}"
        do.run(cmdl.format(**locals()), message.format(**locals()))
    return out_file
Exemple #10
0
def priority_total_coverage(data, out_dir):
    """
    calculate coverage at 10 depth intervals in the priority regions
    """
    from bcbio.structural import prioritize
    bed_file = dd.get_svprioritize(data)
    if not bed_file and not file_exists(bed_file) or prioritize.is_gene_list(bed_file):
        return {}
    in_bam = dd.get_align_bam(data) or dd.get_work_bam(data)
    cleaned_bed = clean_file(bed_file, data, prefix="svprioritize-")
    work_dir = safe_makedir(out_dir)
    sample = dd.get_sample_name(data)
    out_file = os.path.join(work_dir, sample + "_priority_total_coverage.bed")
    if utils.file_uptodate(out_file, cleaned_bed) and utils.file_uptodate(out_file, in_bam):
        return out_file
    cmdl = sambamba.make_command(data, "depth region", in_bam, cleaned_bed,
                                 depth_thresholds=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    with file_transaction(out_file) as tx_out_file:
        message = "Calculating region coverage of {bed_file} in {in_bam}"
        do.run(cmdl + " -o " + tx_out_file, message.format(**locals()))
    logger.debug("Saved svprioritize coverage into " + out_file)
    return out_file