def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists(variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] upload_counts = collections.defaultdict(int) for data in items: if "break-point-inspector" in dd.get_tools_on(data): if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name(data): variant_file = _run_break_point_inspector(data, variant_file, paired, work_dir) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) vc = {"variantcaller": "manta", "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "vrn_file": final_vcf} evidence_bam = _get_evidence_bam(work_dir, data) if evidence_bam: vc["read_evidence"] = evidence_bam data["sv"].append(vc) upload_counts[final_vcf] += 1 out.append(data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] for data in items: if paired and paired.normal_bam and "break-point-inspector" in dd.get_tools_on( data): variant_file = _run_break_point_inspector(data, variant_file, paired) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) data["sv"].append({"variantcaller": "manta", "vrn_file": final_vcf}) out.append(data) return out
def run(items): """Perform detection of structural variations with lumpy. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir( paired.tumor_data if paired and paired.tumor_data else items[0]) previous_evidence = {} full_bams, sr_bams, disc_bams = [], [], [] for data in items: full_bams.append(dd.get_align_bam(data)) sr_bam, disc_bam = sshared.find_existing_split_discordants(data) sr_bams.append(sr_bam) disc_bams.append(disc_bam) cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir) previous_evidence[dd.get_sample_name(data)] = {} if cur_dels and utils.file_exists(cur_dels): previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels if cur_dups and utils.file_exists(cur_dups): previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items) lumpy_vcf = sshared.annotate_with_depth(lumpy_vcf, items) gt_vcfs = {} # Retain paired samples with tumor/normal genotyped in one file if paired and paired.normal_name: batches = [[paired.tumor_data, paired.normal_data]] else: batches = [[x] for x in items] for batch_items in batches: for data in batch_items: gt_vcfs[dd.get_sample_name(data)] = _filter_by_support( lumpy_vcf, data) if paired and paired.normal_name: gt_vcfs = _filter_by_background(paired.tumor_name, [paired.normal_name], gt_vcfs, paired.tumor_data) out = [] upload_counts = collections.defaultdict(int) for data in items: if "sv" not in data: data["sv"] = [] vcf_file = gt_vcfs.get(dd.get_sample_name(data)) if vcf_file: effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff") data["sv"].append({ "variantcaller": "lumpy", "vrn_file": effects_vcf or vcf_file, "do_upload": upload_counts[vcf_file] == 0, # only upload a single file per batch "exclude_file": exclude_file }) upload_counts[vcf_file] += 1 out.append(data) return out
def run(items): """Perform detection of structural variations with lumpy. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir(paired.tumor_data if paired and paired.tumor_data else items[0]) previous_evidence = {} full_bams, sr_bams, disc_bams = [], [], [] for data in items: full_bams.append(dd.get_align_bam(data)) sr_bam, disc_bam = sshared.find_existing_split_discordants(data) sr_bams.append(sr_bam) disc_bams.append(disc_bam) cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir) previous_evidence[dd.get_sample_name(data)] = {} if cur_dels and utils.file_exists(cur_dels): previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels if cur_dups and utils.file_exists(cur_dups): previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items) lumpy_vcf = sshared.annotate_with_depth(lumpy_vcf, items) gt_vcfs = {} # Retain paired samples with tumor/normal genotyped in one file if paired and paired.normal_name: batches = [[paired.tumor_data, paired.normal_data]] else: batches = [[x] for x in items] for batch_items in batches: for data in batch_items: gt_vcfs[dd.get_sample_name(data)] = _filter_by_support(lumpy_vcf, data) if paired and paired.normal_name: gt_vcfs = _filter_by_background(paired.tumor_name, [paired.normal_name], gt_vcfs, paired.tumor_data) out = [] upload_counts = collections.defaultdict(int) for data in items: if "sv" not in data: data["sv"] = [] vcf_file = gt_vcfs.get(dd.get_sample_name(data)) if vcf_file: effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff") data["sv"].append({"variantcaller": "lumpy", "vrn_file": effects_vcf or vcf_file, "do_upload": upload_counts[vcf_file] == 0, # only upload a single file per batch "exclude_file": exclude_file}) upload_counts[vcf_file] += 1 out.append(data) return out
def _add_variantcalls_to_output(out, data, items, is_somatic=False): """Call ploidy and convert into VCF and BED representations. """ call_file = "%s-call%s" % os.path.splitext(out["cns"]) if not utils.file_exists(call_file): with file_transaction(data, call_file) as tx_call_file: filters = ["--filter", "cn"] cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "call"] + \ filters + \ ["--ploidy", str(ploidy.get_ploidy([data])), "-o", tx_call_file, out["cns"]] small_vrn_files = _compatible_small_variants(data, items) if len(small_vrn_files) > 0 and _cna_has_values(out["cns"]): cmd += [ "--vcf", small_vrn_files[0].name, "--sample-id", small_vrn_files[0].sample ] if small_vrn_files[0].normal: cmd += ["--normal-id", small_vrn_files[0].normal] if not is_somatic: cmd += ["-m", "clonal"] gender = _get_batch_gender(items) if gender: cmd += ["--sample-sex", gender] do.run(cmd, "CNVkit call ploidy") calls = {} for outformat in ["bed", "vcf"]: out_file = "%s.%s" % (os.path.splitext(call_file)[0], outformat) calls[outformat] = out_file if not os.path.exists(out_file): with file_transaction(data, out_file) as tx_out_file: cmd = [ os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "export", outformat, "--sample-id", dd.get_sample_name(data), "--ploidy", str(ploidy.get_ploidy([data])), "-o", tx_out_file, call_file ] do.run(cmd, "CNVkit export %s" % outformat) out["call_file"] = call_file out["vrn_bed"] = annotate.add_genes(calls["bed"], data) effects_vcf, _ = effects.add_to_vcf(calls["vcf"], data, "snpeff") out["vrn_file"] = effects_vcf or calls["vcf"] out["vrn_file"] = shared.annotate_with_depth(out["vrn_file"], items) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] upload_counts = collections.defaultdict(int) for data in items: if "break-point-inspector" in dd.get_tools_on(data): if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name( data): variant_file = _run_break_point_inspector( data, variant_file, paired, work_dir) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) vc = { "variantcaller": "manta", "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "vrn_file": final_vcf } evidence_bam = _get_evidence_bam(work_dir, data) if evidence_bam: vc["read_evidence"] = evidence_bam data["sv"].append(vc) upload_counts[final_vcf] += 1 out.append(data) return out
def _add_variantcalls_to_output(out, data, items, is_somatic=False): """Call ploidy and convert into VCF and BED representations. """ call_file = "%s-call%s" % os.path.splitext(out["cns"]) if not utils.file_exists(call_file): with file_transaction(data, call_file) as tx_call_file: filters = ["--filter", "cn"] cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "call"] + \ filters + \ ["--ploidy", str(ploidy.get_ploidy([data])), "-o", tx_call_file, out["cns"]] small_vrn_files = _compatible_small_variants(data, items) if len(small_vrn_files) > 0 and _cna_has_values(out["cns"]): cmd += ["--vcf", small_vrn_files[0].name, "--sample-id", small_vrn_files[0].sample] if small_vrn_files[0].normal: cmd += ["--normal-id", small_vrn_files[0].normal] if not is_somatic: cmd += ["-m", "clonal"] gender = _get_batch_gender(items) if gender: cmd += ["--sample-sex", gender] do.run(cmd, "CNVkit call ploidy") calls = {} for outformat in ["bed", "vcf"]: out_file = "%s.%s" % (os.path.splitext(call_file)[0], outformat) calls[outformat] = out_file if not os.path.exists(out_file): with file_transaction(data, out_file) as tx_out_file: cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "export", outformat, "--sample-id", dd.get_sample_name(data), "--ploidy", str(ploidy.get_ploidy([data])), "-o", tx_out_file, call_file] do.run(cmd, "CNVkit export %s" % outformat) out["call_file"] = call_file out["vrn_bed"] = annotate.add_genes(calls["bed"], data) effects_vcf, _ = effects.add_to_vcf(calls["vcf"], data, "snpeff") out["vrn_file"] = effects_vcf or calls["vcf"] out["vrn_file"] = shared.annotate_with_depth(out["vrn_file"], items) return out