Esempio n. 1
0
def calculate_sv_coverage(data):
    """Calculate coverage within bins for downstream CNV calling.

    Creates corrected cnr files with log2 ratios and depths.
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate, cnvkit
    data = utils.to_single_data(data)
    if not cnvkit.use_general_sv_bins(data):
        return [[data]]
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural",
                                               dd.get_sample_name(data), "bins"))
    out_target_file = os.path.join(work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data))
    out_anti_file = os.path.join(work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data))
    if ((not utils.file_exists(out_target_file) or not utils.file_exists(out_anti_file))
          and (dd.get_align_bam(data) or dd.get_work_bam(data))):
        # mosdepth
        target_cov = coverage.run_mosdepth(data, "target", tz.get_in(["regions", "bins", "target"], data))
        anti_cov = coverage.run_mosdepth(data, "antitarget", tz.get_in(["regions", "bins", "antitarget"], data))
        target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
        anti_cov_genes = annotate.add_genes(anti_cov.regions, data, max_distance=0)
        out_target_file = _add_log2_depth(target_cov_genes, out_target_file, data)
        out_anti_file = _add_log2_depth(anti_cov_genes, out_anti_file, data)
        # TODO: Correct for GC bias
    if os.path.exists(out_target_file):
        data["depth"]["bins"] = {"target": out_target_file, "antitarget": out_anti_file}
    return [[data]]
Esempio n. 2
0
def _calculate_sv_coverage_cnvkit(data, work_dir):
    """Calculate coverage in an CNVkit ready format using mosdepth.
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate
    out_target_file = os.path.join(
        work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data))
    out_anti_file = os.path.join(
        work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data))
    if ((not utils.file_exists(out_target_file)
         or not utils.file_exists(out_anti_file))
            and (dd.get_align_bam(data) or dd.get_work_bam(data))):
        target_cov = coverage.run_mosdepth(
            data, "target", tz.get_in(["regions", "bins", "target"], data))
        anti_cov = coverage.run_mosdepth(
            data, "antitarget",
            tz.get_in(["regions", "bins", "antitarget"], data))
        target_cov_genes = annotate.add_genes(target_cov.regions,
                                              data,
                                              max_distance=0)
        anti_cov_genes = annotate.add_genes(anti_cov.regions,
                                            data,
                                            max_distance=0)
        out_target_file = _add_log2_depth(target_cov_genes, out_target_file,
                                          data)
        out_anti_file = _add_log2_depth(anti_cov_genes, out_anti_file, data)
    return out_target_file, out_anti_file
Esempio n. 3
0
def _calculate_sv_coverage_cnvkit(data, work_dir):
    """Calculate coverage in an CNVkit ready format using mosdepth.
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate
    out_target_file = os.path.join(work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data))
    out_anti_file = os.path.join(work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data))
    if ((not utils.file_exists(out_target_file) or not utils.file_exists(out_anti_file)) and
          (dd.get_align_bam(data) or dd.get_work_bam(data))):
        target_cov = coverage.run_mosdepth(data, "target", tz.get_in(["regions", "bins", "target"], data))
        anti_cov = coverage.run_mosdepth(data, "antitarget", tz.get_in(["regions", "bins", "antitarget"], data))
        target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
        out_target_file = _add_log2_depth(target_cov_genes, out_target_file, data)
        out_anti_file = _add_log2_depth(anti_cov.regions, out_anti_file, data)
    return out_target_file, out_anti_file
Esempio n. 4
0
def _calculate_sv_coverage_gatk(data, work_dir):
    """Calculate coverage in defined regions using GATK tools

    TODO: This does double calculations to get GATK4 compatible HDF read counts
    and then depth and gene annotations. Both are needed for creating heterogeneity inputs.
    Ideally replace with a single mosdepth coverage calculation, and creat GATK4 TSV format:

    CONTIG  START   END     COUNT
    chrM    1       1000    13268
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate
    # GATK compatible
    target_file = gatkcnv.collect_read_counts(data, work_dir)
    # heterogeneity compatible
    target_in = bedutils.clean_file(tz.get_in(["regions", "bins", "target"], data), data, bedprep_dir=work_dir)
    target_cov = coverage.run_mosdepth(data, "target-gatk", target_in)
    target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
    return target_file, target_cov_genes
Esempio n. 5
0
def _calculate_sv_coverage_gatk(data, work_dir):
    """Calculate coverage in defined regions using GATK tools

    TODO: This does double calculations to get GATK4 compatible HDF read counts
    and then depth and gene annotations. Both are needed for creating heterogeneity inputs.
    Ideally replace with a single mosdepth coverage calculation, and creat GATK4 TSV format:

    CONTIG  START   END     COUNT
    chrM    1       1000    13268
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate
    # GATK compatible
    target_file = gatkcnv.collect_read_counts(data, work_dir)
    # heterogeneity compatible
    target_in = bedutils.clean_file(tz.get_in(["regions", "bins", "target"], data), data, bedprep_dir=work_dir)
    target_cov = coverage.run_mosdepth(data, "target-gatk", target_in)
    target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
    return target_file, target_cov_genes