def get_analysis_intervals(data, vrn_file, base_dir): """Retrieve analysis regions for the current variant calling pipeline. """ if vrn_file and "gvcf" in dd.get_tools_on(data): callable_bed = _callable_from_gvcf(data, vrn_file, base_dir) if callable_bed: return callable_bed if data.get("ensemble_bed"): return data["ensemble_bed"] elif dd.get_callable_regions(data): return dd.get_callable_regions(data) elif data.get("align_bam"): return callable.sample_callable_bed(data["align_bam"], dd.get_ref_file(data), data)[0] elif data.get("work_bam"): return callable.sample_callable_bed(data["work_bam"], dd.get_ref_file(data), data)[0] elif data.get("work_bam_callable"): return callable.sample_callable_bed(data["work_bam_callable"], dd.get_ref_file(data), data)[0] elif tz.get_in(["config", "algorithm", "callable_regions"], data): return tz.get_in(["config", "algorithm", "callable_regions"], data) elif tz.get_in(["config", "algorithm", "variant_regions"], data): return tz.get_in(["config", "algorithm", "variant_regions"], data)
def get_analysis_intervals(data): """Retrieve analysis regions for the current variant calling pipeline. """ if data.get("ensemble_bed"): return data["ensemble_bed"] elif dd.get_callable_regions(data): return dd.get_callable_regions(data) elif data.get("align_bam"): return callable.sample_callable_bed(data["align_bam"], dd.get_ref_file(data), data) elif data.get("work_bam"): return callable.sample_callable_bed(data["work_bam"], dd.get_ref_file(data), data) elif data.get("work_bam_callable"): return callable.sample_callable_bed(data["work_bam_callable"], dd.get_ref_file(data), data) elif tz.get_in(["config", "algorithm", "callable_regions"], data): return tz.get_in(["config", "algorithm", "callable_regions"], data) elif tz.get_in(["config", "algorithm", "variant_regions"], data): return tz.get_in(["config", "algorithm", "variant_regions"], data)
def _evaluate_vcf(calls, truth_vcf, work_dir, data): out_file = os.path.join(work_dir, os.path.join("%s-sv-validate.csv" % dd.get_sample_name(data))) if not utils.file_exists(out_file): with file_transaction(data, out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(["sample", "caller", "vtype", "metric", "value"]) for call in calls: for stats in _validate_caller_vcf(call["vrn_file"], truth_vcf, dd.get_callable_regions(data), call["variantcaller"], data): writer.writerow(stats) return out_file
def get_analysis_intervals(data, vrn_file, base_dir): """Retrieve analysis regions for the current variant calling pipeline. """ if vrn_file and "gvcf" in dd.get_tools_on(data): callable_bed = _callable_from_gvcf(data, vrn_file, base_dir) if callable_bed: return callable_bed if data.get("ensemble_bed"): return data["ensemble_bed"] elif dd.get_callable_regions(data): return dd.get_callable_regions(data) elif data.get("align_bam"): return callable.sample_callable_bed(data["align_bam"], dd.get_ref_file(data), data) elif data.get("work_bam"): return callable.sample_callable_bed(data["work_bam"], dd.get_ref_file(data), data) elif data.get("work_bam_callable"): return callable.sample_callable_bed(data["work_bam_callable"], dd.get_ref_file(data), data) elif tz.get_in(["config", "algorithm", "callable_regions"], data): return tz.get_in(["config", "algorithm", "callable_regions"], data) elif tz.get_in(["config", "algorithm", "variant_regions"], data): return tz.get_in(["config", "algorithm", "variant_regions"], data)
def _evaluate_vcf(calls, truth_vcf, work_dir, data): out_file = os.path.join( work_dir, os.path.join("%s-sv-validate.csv" % dd.get_sample_name(data))) if not utils.file_exists(out_file): with file_transaction(data, out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow( ["sample", "caller", "vtype", "metric", "value"]) for call in calls: for stats in _validate_caller_vcf( call["vrn_file"], truth_vcf, dd.get_callable_regions(data), call["variantcaller"], data): writer.writerow(stats) return out_file
def _run_purecn_dx(out, paired): """Extract copy number and mutational metrics from PureCN rds file. """ out_base, out, all_files = _get_purecn_dx_files(paired, out) if not utils.file_uptodate(out["mutation_burden"], out["rds"]): with file_transaction(paired.tumor_data, out_base) as tx_out_base: cmd = [ "PureCN_Dx.R", "--rds", out["rds"], "--callable", dd.get_callable_regions(paired.tumor_data), "--signatures", "--out", tx_out_base ] do.run(cmd, "PureCN Dx mutational burden and signatures") for f in all_files: if os.path.exists(os.path.join(os.path.dirname(tx_out_base), f)): shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) return out