def _run_coverage_qc(bam_file, data, out_dir): """Run coverage QC analysis""" priority = cov.priority_coverage(data, out_dir) cov.priority_total_coverage(data, out_dir) coverage = cov.coverage(data, out_dir) problem_regions = dd.get_problem_region_dir(data) annotated = None if problem_regions and priority: annotated = cov.decorate_problem_regions(priority, problem_regions) return None
def summary(items): data = items[0] cutoff = dd.get_coverage_depth_min(data) work_dir = dd.get_work_dir(data) out_dir = utils.safe_makedir(os.path.join(work_dir, "coverage")) coverage_bed = dd.get_coverage_regions(data) priority_bed = dd.get_priority_regions(data) batch = _get_group_batch(items) assert batch, "Did not find batch for samples: %s" % ",".join([dd.get_sample_name(x) for x in items]) out_file = os.path.join(out_dir, "%s-coverage.db" % batch) if not utils.file_exists(out_file): if coverage_bed: mini_coverage = bed.minimize(coverage_bed).fn if priority_bed: mini_priority = bed.minimize(priority_bed).fn if coverage_bed and priority_bed: combined_bed = bed.concat([mini_coverage, mini_priority]).fn elif coverage_bed: combined_bed = mini_coverage elif priority_bed: combined_bed = mini_priority else: # no coverage or priority file has been set return items clean_bed = bedutils.clean_file(combined_bed, data) if len(combined_bed) > 0 else combined_bed.fn bed_file = _uniquify_bed_names(clean_bed, out_dir, data) if bed_file and utils.file_exists(bed_file): with file_transaction(data, out_file) as tx_out_file: chanjo = os.path.join(os.path.dirname(sys.executable), "chanjo") cmd = "{chanjo} --db {tx_out_file} build {bed_file}" do.run(cmd.format(**locals()), "Prep chanjo database") for data in items: sample = dd.get_sample_name(data) bam_file = data["work_bam"] cmd = ( "{chanjo} annotate -s {sample} -g {batch} -c {cutoff} " "{bam_file} {bed_file} | " "{chanjo} --db {tx_out_file} import" ) do.run(cmd.format(**locals()), "Chanjo coverage", data) if bed_file: os.remove(bed_file) coverage = regions_coverage(out_file, batch, out_dir) problem_regions = dd.get_problem_region_dir(data) if problem_regions: coverage = decorate_problem_regions(coverage, problem_regions) out = [] for data in items: if utils.file_exists(out_file): data["coverage"] = {"summary": out_file, "all": coverage} out.append([data]) return out
def coverage_report(data): """ Run heavy coverage and variants process in parallel """ data = cov.coverage(data) data = cov.variants(data) problem_regions = dd.get_problem_region_dir(data) name = dd.get_sample_name(data) coverage = data['coverage'] annotated = None if problem_regions and coverage: annotated = decorate_problem_regions(coverage, problem_regions) data['coverage'] = {'all': coverage, 'problems': annotated} return [[data]]
def summary(items): data = items[0] cutoff = dd.get_coverage_depth_min(data) work_dir = dd.get_work_dir(data) out_dir = utils.safe_makedir(os.path.join(work_dir, "coverage")) coverage_bed = dd.get_coverage_regions(data) priority_bed = dd.get_priority_regions(data) batch = _get_group_batch(items) assert batch, ("Did not find batch for samples: %s" % ",".join([dd.get_sample_name(x) for x in items])) out_file = os.path.join(out_dir, "%s-coverage.db" % batch) if not utils.file_exists(out_file): combined_bed = bed.concat([coverage_bed, priority_bed]) clean_bed = bedutils.clean_file( combined_bed.fn, data) if len(combined_bed) > 0 else combined_bed.fn bed_file = _uniquify_bed_names(clean_bed, out_dir, data) if utils.file_exists(bed_file): with file_transaction(data, out_file) as tx_out_file: chanjo = os.path.join(os.path.dirname(sys.executable), "chanjo") cmd = ("{chanjo} --db {tx_out_file} build {bed_file}") do.run(cmd.format(**locals()), "Prep chanjo database") for data in items: sample = dd.get_sample_name(data) bam_file = data["work_bam"] cmd = ( "{chanjo} annotate -s {sample} -g {batch} -c {cutoff} " "{bam_file} {bed_file} | " "{chanjo} --db {tx_out_file} import") do.run(cmd.format(**locals()), "Chanjo coverage", data) os.remove(bed_file) coverage = regions_coverage(out_file, batch, out_dir) problem_regions = dd.get_problem_region_dir(data) if problem_regions: coverage = decorate_problem_regions(coverage, problem_regions) out = [] for data in items: if utils.file_exists(out_file): data["coverage"] = {"summary": out_file, "all": coverage} out.append([data]) return out
def summary(items): cutoff = DEFAULT_COVERAGE_CUTOFF data = items[0] work_dir = dd.get_work_dir(data) out_dir = utils.safe_makedir(os.path.join(work_dir, "coverage")) coverage_bed = dd.get_coverage_regions(data) priority_bed = dd.get_priority_regions(data) combined_bed = bed.concat([coverage_bed, priority_bed]) clean_bed = bedutils.clean_file(combined_bed.fn, data) if len(combined_bed) > 0 else combined_bed.fn bed_file = _uniquify_bed_names(clean_bed, out_dir, data) logger.info("THE BED FILE %s" % bed_file) batch = _get_group_batch(items) assert batch, ("Did not find batch for samples: %s" % ",".join([dd.get_sample_name(x) for x in items])) out_file = os.path.join(out_dir, "%s-coverage.db" % batch) if not utils.file_exists(out_file) and utils.file_exists(bed_file): with file_transaction(data, out_file) as tx_out_file: chanjo = os.path.join(os.path.dirname(sys.executable), "chanjo") cmd = ("{chanjo} --db {tx_out_file} build {bed_file}") do.run(cmd.format(**locals()), "Prep chanjo database") for data in items: sample = dd.get_sample_name(data) bam_file = data["work_bam"] cmd = ("{chanjo} annotate -s {sample} -g {batch} -c {cutoff} " "{bam_file} {bed_file} | " "{chanjo} --db {tx_out_file} import") do.run(cmd.format(**locals()), "Chanjo coverage", data) incomplete = incomplete_regions(out_file, batch, out_dir) problem_regions = dd.get_problem_region_dir(data) if problem_regions: incomplete = decorate_problem_regions(incomplete, problem_regions) out = [] for data in items: if utils.file_exists(out_file): data["coverage"] = {"summary": out_file, "incomplete": incomplete} out.append([data]) os.remove(bed_file) return out