def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists(variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] upload_counts = collections.defaultdict(int) for data in items: if "break-point-inspector" in dd.get_tools_on(data): if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name(data): variant_file = _run_break_point_inspector(data, variant_file, paired, work_dir) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) vc = {"variantcaller": "manta", "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "vrn_file": final_vcf} evidence_bam = _get_evidence_bam(work_dir, data) if evidence_bam: vc["read_evidence"] = evidence_bam data["sv"].append(vc) upload_counts[final_vcf] += 1 out.append(data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file out = [] for data in items: if paired and paired.normal_bam and "break-point-inspector" in dd.get_tools_on( data): variant_file = _run_break_point_inspector(data, variant_file, paired) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) data["sv"].append({"variantcaller": "manta", "vrn_file": final_vcf}) out.append(data) return out
def run(items): """Perform detection of structural variations with delly. Performs post-call filtering with a custom filter tuned based on NA12878 Moleculo and PacBio data, using calls prepared by @ryanlayer and @cc2qe Filters using the high quality variant pairs (DV) compared with high quality reference pairs (DR). """ work_dir = utils.safe_makedir( os.path.join(items[0]["dirs"]["work"], "structural", items[0]["name"][-1], "delly")) # Add core request for delly config = copy.deepcopy(items[0]["config"]) delly_config = utils.get_in(config, ("resources", "delly"), {}) delly_config["cores"] = 1 config["resources"]["delly"] = delly_config parallel = { "type": "local", "cores": config["algorithm"].get("num_cores", 1), "progs": ["delly"] } work_bams = [dd.get_align_bam(d) for d in items] ref_file = dd.get_ref_file(items[0]) sv_types = [ "DEL", "DUP" ] # "TRA" has invalid VCF END specifications that GATK doesn't like, "INV" very slow exclude_file = _get_full_exclude_file(items, work_dir) bytype_vcfs = run_multicore( _run_delly, [(work_bams, chrom, sv_type, ref_file, work_dir, items) for (chrom, sv_type) in itertools.product( sshared.get_sv_chroms(items, exclude_file), sv_types)], config, parallel) out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs) combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file, config) out = [] for data in items: if "sv" not in data: data["sv"] = [] base, ext = utils.splitext_plus(combo_vcf) final_vcf = sshared.finalize_sv(combo_vcf, data, items) if final_vcf: delly_vcf = _delly_count_evidence_filter(final_vcf, data) data["sv"].append({ "variantcaller": "delly", "vrn_file": delly_vcf, "exclude": exclude_file }) out.append(data) return out
def run(items): """Perform detection of structural variations with delly. Performs post-call filtering with a custom filter tuned based on NA12878 Moleculo and PacBio data, using calls prepared by @ryanlayer and @cc2qe Filters using the high quality variant pairs (DV) compared with high quality reference pairs (DR). """ work_dir = utils.safe_makedir( os.path.join(items[0]["dirs"]["work"], "structural", dd.get_sample_name(items[0]), "delly")) # Add core request for delly config = copy.deepcopy(items[0]["config"]) delly_config = utils.get_in(config, ("resources", "delly"), {}) delly_config["cores"] = 1 config["resources"]["delly"] = delly_config parallel = { "type": "local", "cores": config["algorithm"].get("num_cores", 1), "progs": ["delly"] } work_bams = [dd.get_align_bam(d) for d in items] ref_file = dd.get_ref_file(items[0]) exclude_file = _get_full_exclude_file(items, work_bams, work_dir) bytype_vcfs = run_multicore( _run_delly, [(work_bams, chrom, ref_file, work_dir, items) for chrom in sshared.get_sv_chroms(items, exclude_file)], config, parallel) out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs) combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file, config) out = [] upload_counts = collections.defaultdict(int) for data in items: if "sv" not in data: data["sv"] = [] base, ext = utils.splitext_plus(combo_vcf) final_vcf = sshared.finalize_sv(combo_vcf, data, items) if final_vcf: delly_vcf = _delly_count_evidence_filter(final_vcf, data) data["sv"].append({ "variantcaller": "delly", "vrn_file": delly_vcf, "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "exclude": exclude_file }) upload_counts[final_vcf] += 1 out.append(data) return out
def run(items): """Perform detection of structural variations with delly. Performs post-call filtering with a custom filter tuned based on NA12878 Moleculo and PacBio data, using calls prepared by @ryanlayer and @cc2qe Filters using the high quality variant pairs (DV) compared with high quality reference pairs (DR). """ work_dir = utils.safe_makedir(os.path.join(items[0]["dirs"]["work"], "structural", dd.get_sample_name(items[0]), "delly")) # Add core request for delly config = copy.deepcopy(items[0]["config"]) delly_config = utils.get_in(config, ("resources", "delly"), {}) delly_config["cores"] = 1 config["resources"]["delly"] = delly_config parallel = {"type": "local", "cores": config["algorithm"].get("num_cores", 1), "progs": ["delly"]} work_bams = [dd.get_align_bam(d) for d in items] ref_file = dd.get_ref_file(items[0]) exclude_file = _get_full_exclude_file(items, work_bams, work_dir) bytype_vcfs = run_multicore(_run_delly, [(work_bams, chrom, ref_file, work_dir, items) for chrom in sshared.get_sv_chroms(items, exclude_file)], config, parallel) out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs) combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file, config) out = [] upload_counts = collections.defaultdict(int) for data in items: if "sv" not in data: data["sv"] = [] base, ext = utils.splitext_plus(combo_vcf) final_vcf = sshared.finalize_sv(combo_vcf, data, items) if final_vcf: delly_vcf = _delly_count_evidence_filter(final_vcf, data) data["sv"].append({"variantcaller": "delly", "vrn_file": delly_vcf, "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "exclude": exclude_file}) upload_counts[final_vcf] += 1 out.append(data) return out
def run(items): """Perform detection of structural variations with delly. Performs post-call filtering with a custom filter tuned based on NA12878 Moleculo and PacBio data, using calls prepared by @ryanlayer and @cc2qe Filters using the high quality variant pairs (DV) compared with high quality reference pairs (DR). """ work_dir = utils.safe_makedir(os.path.join(items[0]["dirs"]["work"], "structural", items[0]["name"][-1], "delly")) # Add core request for delly config = copy.deepcopy(items[0]["config"]) delly_config = utils.get_in(config, ("resources", "delly"), {}) delly_config["cores"] = 1 config["resources"]["delly"] = delly_config parallel = {"type": "local", "cores": config["algorithm"].get("num_cores", 1), "progs": ["delly"]} work_bams = [dd.get_align_bam(d) for d in items] ref_file = dd.get_ref_file(items[0]) sv_types = ["DEL", "DUP"] # "TRA" has invalid VCF END specifications that GATK doesn't like, "INV" very slow exclude_file = _get_full_exclude_file(items, work_dir) bytype_vcfs = run_multicore(_run_delly, [(work_bams, chrom, sv_type, ref_file, work_dir, items) for (chrom, sv_type) in itertools.product(sshared.get_sv_chroms(items, exclude_file), sv_types)], config, parallel) out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs) combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file, config) out = [] for data in items: if "sv" not in data: data["sv"] = [] base, ext = utils.splitext_plus(combo_vcf) final_vcf = sshared.finalize_sv(combo_vcf, data, items) if final_vcf: delly_vcf = _delly_count_evidence_filter(final_vcf, data) data["sv"].append({"variantcaller": "delly", "vrn_file": delly_vcf, "exclude": exclude_file}) out.append(data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists(variant_file), "Manta finished without output file %s" % variant_file out = [] for data in items: if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) data["sv"].append({"variantcaller": "manta", "vrn_file": final_vcf}) out.append(data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] upload_counts = collections.defaultdict(int) for data in items: if "break-point-inspector" in dd.get_tools_on(data): if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name( data): variant_file = _run_break_point_inspector( data, variant_file, paired, work_dir) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) vc = { "variantcaller": "manta", "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "vrn_file": final_vcf } evidence_bam = _get_evidence_bam(work_dir, data) if evidence_bam: vc["read_evidence"] = evidence_bam data["sv"].append(vc) upload_counts[final_vcf] += 1 out.append(data) return out
def run(items, background=None): """Detect copy number variations from batched set of samples using WHAM. """ if not background: background = [] background_bams = [] paired = vcfutils.get_paired_bams([x["align_bam"] for x in items], items) if paired: inputs = [paired.tumor_data] if paired.normal_bam: background = [paired.normal_data] background_bams = [paired.normal_bam] else: assert not background inputs, background = shared.find_case_control(items) background_bams = [x["align_bam"] for x in background] orig_vcf = _run_wham(inputs, background_bams) out = [] for data in inputs: if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(orig_vcf, data, items) data["sv"].append({"variantcaller": "wham", "vrn_file": final_vcf}) out.append(data) return out