def _run_coverage_qc(bam_file, data, out_dir): """Run coverage QC analysis""" priority = cov.priority_coverage(data, out_dir) cov.priority_total_coverage(data, out_dir) coverage = cov.coverage(data, out_dir) problem_regions = dd.get_problem_region_dir(data) annotated = None if problem_regions and priority: annotated = cov.decorate_problem_regions(priority, problem_regions) return None
def _run_coverage_qc(bam_file, data, out_dir): """Run coverage QC analysis""" out = dict() total_reads = sambamba.number_of_reads(data, bam_file) out['Total_reads'] = total_reads mapped = sambamba.number_of_mapped_reads(data, bam_file) out['Mapped_reads'] = mapped if total_reads: out['Mapped_reads_pct'] = 100.0 * mapped / total_reads if mapped: mapped_unique = sambamba.number_of_mapped_reads(data, bam_file, keep_dups=False) out['Mapped_unique_reads'] = mapped mapped_dups = mapped - mapped_unique out['Duplicates'] = mapped_dups out['Duplicates_pct'] = 100.0 * mapped_dups / mapped if dd.get_coverage(data): cov_bed_file = clean_file(dd.get_coverage(data), data, prefix="cov-", simple=True) merged_bed_file = bedutils.merge_overlaps(cov_bed_file, data) target_name = "coverage" else: merged_bed_file = dd.get_variant_regions_merged(data) target_name = "variant_regions" ontarget = sambamba.number_mapped_reads_on_target( data, merged_bed_file, bam_file, keep_dups=False, target_name=target_name) if mapped_unique: out["Ontarget_unique_reads"] = ontarget out["Ontarget_pct"] = 100.0 * ontarget / mapped_unique out['Offtarget_pct'] = 100.0 * (mapped_unique - ontarget) / mapped_unique padded_bed_file = bedutils.get_padded_bed_file(merged_bed_file, 200, data) ontarget_padded = sambamba.number_mapped_reads_on_target( data, padded_bed_file, bam_file, keep_dups=False, target_name=target_name + "_padded") out["Ontarget_padded_pct"] = 100.0 * ontarget_padded / mapped_unique if total_reads: out['Usable_pct'] = 100.0 * ontarget / total_reads avg_coverage = get_average_coverage(data, bam_file, merged_bed_file, target_name) out['Avg_coverage'] = avg_coverage priority = cov.priority_coverage(data, out_dir) cov.priority_total_coverage(data, out_dir) region_coverage_file = cov.coverage_region_detailed_stats(data, out_dir) # Re-enable with annotations from internally installed # problem region directory # if priority: # annotated = cov.decorate_problem_regions(priority, problem_regions) return out
def _run_coverage_qc(bam_file, data, out_dir): """Run coverage QC analysis""" out = dict() if dd.get_coverage(data): bed_file = bedutils.merge_overlaps(dd.get_coverage(data), data) target_name = "coverage" elif dd.get_variant_regions_merged(data): bed_file = dd.get_variant_regions_merged(data) target_name = "variant_regions" else: bed_file = None target_name = "wgs" bed_file = clean_file(bed_file, data, prefix="cov-", simple=True) offtarget_stats_file = calculate_offtarget_stats(bam_file, data, bed_file, target_name) if offtarget_stats_file and utils.file_exists(offtarget_stats_file): with open(offtarget_stats_file) as in_handle: stats = yaml.safe_load(in_handle) offtarget = stats.get('offtarget') mapped_unique = stats['mapped_unique'] if offtarget and mapped_unique: out['offtarget_rate'] = 1.0 * offtarget / mapped_unique mapped = stats['mapped'] if mapped: out['Duplicates'] = mapped - mapped_unique out['Duplicates_pct'] = 1.0 * (mapped - mapped_unique) / mapped total_reads = stats['total_reads'] if total_reads: out['usable_rate'] = 1.0 * (mapped_unique - offtarget) / total_reads avg_coverage = get_average_coverage(data, bam_file, bed_file, target_name) out['avg_coverage'] = avg_coverage priority = cov.priority_coverage(data, out_dir) cov.priority_total_coverage(data, out_dir) region_coverage_file = cov.coverage_region_detailed_stats(data, out_dir) # Re-enable with annotations from internally installed # problem region directory # if priority: # annotated = cov.decorate_problem_regions(priority, problem_regions) return out
def coverage_report(data): """ Run heavy coverage and variants process in parallel """ data = cov.coverage(data) data = cov.variants(data) data = cov.priority_coverage(data) data = cov.priority_total_coverage(data) problem_regions = dd.get_problem_region_dir(data) name = dd.get_sample_name(data) if "coverage" in data: coverage = data['coverage'] annotated = None if problem_regions and coverage: annotated = cov.decorate_problem_regions(coverage, problem_regions) data['coverage'] = {'all': coverage, 'problems': annotated} return [[data]]
def _run_coverage_qc(bam_file, data, out_dir): """Run coverage QC analysis""" out = dict() total_reads = sambamba.number_of_reads(data, bam_file) out['Total_reads'] = total_reads mapped = sambamba.number_of_mapped_reads(data, bam_file) out['Mapped_reads'] = mapped if total_reads: out['Mapped_reads_pct'] = 100.0 * mapped / total_reads if mapped: mapped_unique = sambamba.number_of_mapped_reads(data, bam_file, keep_dups=False) out['Mapped_unique_reads'] = mapped mapped_dups = mapped - mapped_unique out['Duplicates'] = mapped_dups out['Duplicates_pct'] = 100.0 * mapped_dups / mapped if dd.get_coverage(data): cov_bed_file = clean_file(dd.get_coverage(data), data, prefix="cov-", simple=True) merged_bed_file = bedutils.merge_overlaps(cov_bed_file, data) target_name = "coverage" else: merged_bed_file = dd.get_variant_regions_merged(data) target_name = "variant_regions" ontarget = sambamba.number_mapped_reads_on_target( data, merged_bed_file, bam_file, keep_dups=False, target_name=target_name) if mapped_unique: out["Ontarget_unique_reads"] = ontarget out["Ontarget_pct"] = 100.0 * ontarget / mapped_unique out['Offtarget_pct'] = 100.0 * (mapped_unique - ontarget) / mapped_unique padded_bed_file = bedutils.get_padded_bed_file( merged_bed_file, 200, data) ontarget_padded = sambamba.number_mapped_reads_on_target( data, padded_bed_file, bam_file, keep_dups=False, target_name=target_name + "_padded") out["Ontarget_padded_pct"] = 100.0 * ontarget_padded / mapped_unique if total_reads: out['Usable_pct'] = 100.0 * ontarget / total_reads avg_coverage = get_average_coverage(data, bam_file, merged_bed_file, target_name) out['Avg_coverage'] = avg_coverage priority = cov.priority_coverage(data, out_dir) cov.priority_total_coverage(data, out_dir) region_coverage_file = cov.coverage_region_detailed_stats(data, out_dir) # Re-enable with annotations from internally installed # problem region directory # if priority: # annotated = cov.decorate_problem_regions(priority, problem_regions) return out
def _run_coverage_qc(bam_file, data, out_dir): """Run coverage QC analysis""" priority = cov.priority_coverage(data, out_dir) cov.priority_total_coverage(data, out_dir) coverage = cov.coverage(data, out_dir)