def calculate_sv_coverage(data): """Calculate coverage within bins for downstream CNV calling. Creates corrected cnr files with log2 ratios and depths. """ from bcbio.variation import coverage from bcbio.structural import annotate, cnvkit data = utils.to_single_data(data) if not cnvkit.use_general_sv_bins(data): return [[data]] work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural", dd.get_sample_name(data), "bins")) out_target_file = os.path.join(work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data)) out_anti_file = os.path.join(work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data)) if ((not utils.file_exists(out_target_file) or not utils.file_exists(out_anti_file)) and (dd.get_align_bam(data) or dd.get_work_bam(data))): # mosdepth target_cov = coverage.run_mosdepth(data, "target", tz.get_in(["regions", "bins", "target"], data)) anti_cov = coverage.run_mosdepth(data, "antitarget", tz.get_in(["regions", "bins", "antitarget"], data)) target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0) anti_cov_genes = annotate.add_genes(anti_cov.regions, data, max_distance=0) out_target_file = _add_log2_depth(target_cov_genes, out_target_file, data) out_anti_file = _add_log2_depth(anti_cov_genes, out_anti_file, data) # TODO: Correct for GC bias if os.path.exists(out_target_file): data["depth"]["bins"] = {"target": out_target_file, "antitarget": out_anti_file} return [[data]]
def _calculate_sv_coverage_cnvkit(data, work_dir): """Calculate coverage in an CNVkit ready format using mosdepth. """ from bcbio.variation import coverage from bcbio.structural import annotate out_target_file = os.path.join( work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data)) out_anti_file = os.path.join( work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data)) if ((not utils.file_exists(out_target_file) or not utils.file_exists(out_anti_file)) and (dd.get_align_bam(data) or dd.get_work_bam(data))): target_cov = coverage.run_mosdepth( data, "target", tz.get_in(["regions", "bins", "target"], data)) anti_cov = coverage.run_mosdepth( data, "antitarget", tz.get_in(["regions", "bins", "antitarget"], data)) target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0) anti_cov_genes = annotate.add_genes(anti_cov.regions, data, max_distance=0) out_target_file = _add_log2_depth(target_cov_genes, out_target_file, data) out_anti_file = _add_log2_depth(anti_cov_genes, out_anti_file, data) return out_target_file, out_anti_file
def _calculate_sv_coverage_cnvkit(data, work_dir): """Calculate coverage in an CNVkit ready format using mosdepth. """ from bcbio.variation import coverage from bcbio.structural import annotate out_target_file = os.path.join(work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data)) out_anti_file = os.path.join(work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data)) if ((not utils.file_exists(out_target_file) or not utils.file_exists(out_anti_file)) and (dd.get_align_bam(data) or dd.get_work_bam(data))): target_cov = coverage.run_mosdepth(data, "target", tz.get_in(["regions", "bins", "target"], data)) anti_cov = coverage.run_mosdepth(data, "antitarget", tz.get_in(["regions", "bins", "antitarget"], data)) target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0) out_target_file = _add_log2_depth(target_cov_genes, out_target_file, data) out_anti_file = _add_log2_depth(anti_cov.regions, out_anti_file, data) return out_target_file, out_anti_file
def _calculate_sv_coverage_gatk(data, work_dir): """Calculate coverage in defined regions using GATK tools TODO: This does double calculations to get GATK4 compatible HDF read counts and then depth and gene annotations. Both are needed for creating heterogeneity inputs. Ideally replace with a single mosdepth coverage calculation, and creat GATK4 TSV format: CONTIG START END COUNT chrM 1 1000 13268 """ from bcbio.variation import coverage from bcbio.structural import annotate # GATK compatible target_file = gatkcnv.collect_read_counts(data, work_dir) # heterogeneity compatible target_in = bedutils.clean_file(tz.get_in(["regions", "bins", "target"], data), data, bedprep_dir=work_dir) target_cov = coverage.run_mosdepth(data, "target-gatk", target_in) target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0) return target_file, target_cov_genes
def _calculate_sv_coverage_gatk(data, work_dir): """Calculate coverage in defined regions using GATK tools TODO: This does double calculations to get GATK4 compatible HDF read counts and then depth and gene annotations. Both are needed for creating heterogeneity inputs. Ideally replace with a single mosdepth coverage calculation, and creat GATK4 TSV format: CONTIG START END COUNT chrM 1 1000 13268 """ from bcbio.variation import coverage from bcbio.structural import annotate # GATK compatible target_file = gatkcnv.collect_read_counts(data, work_dir) # heterogeneity compatible target_in = bedutils.clean_file(tz.get_in(["regions", "bins", "target"], data), data, bedprep_dir=work_dir) target_cov = coverage.run_mosdepth(data, "target-gatk", target_in) target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0) return target_file, target_cov_genes