Beispiel #1
0
def run(items):
    """Perform detection of structural variations with Manta.
    """
    paired = vcfutils.get_paired(items)
    data = paired.tumor_data if paired else items[0]
    work_dir = _sv_workdir(data)
    variant_file = _get_out_file(work_dir, paired)
    if not utils.file_exists(variant_file):
        with file_transaction(data, work_dir) as tx_work_dir:
            utils.safe_makedir(tx_work_dir)
            tx_workflow_file = _prep_config(items, paired, tx_work_dir)
            _run_workflow(items, paired, tx_workflow_file, tx_work_dir)
    assert utils.file_exists(variant_file), "Manta finished without output file %s" % variant_file
    variant_file = shared.annotate_with_depth(variant_file, items)
    out = []
    upload_counts = collections.defaultdict(int)
    for data in items:
        if "break-point-inspector" in dd.get_tools_on(data):
            if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name(data):
                variant_file = _run_break_point_inspector(data, variant_file, paired, work_dir)
        if "sv" not in data:
            data["sv"] = []
        final_vcf = shared.finalize_sv(variant_file, data, items)
        vc = {"variantcaller": "manta",
              "do_upload": upload_counts[final_vcf] == 0,  # only upload a single file per batch
              "vrn_file": final_vcf}
        evidence_bam = _get_evidence_bam(work_dir, data)
        if evidence_bam:
            vc["read_evidence"] = evidence_bam
        data["sv"].append(vc)
        upload_counts[final_vcf] += 1
        out.append(data)
    return out
Beispiel #2
0
def run(items):
    """Perform detection of structural variations with Manta.
    """
    paired = vcfutils.get_paired(items)
    data = paired.tumor_data if paired else items[0]
    work_dir = _sv_workdir(data)
    variant_file = _get_out_file(work_dir, paired)
    if not utils.file_exists(variant_file):
        with file_transaction(data, work_dir) as tx_work_dir:
            utils.safe_makedir(tx_work_dir)
            tx_workflow_file = _prep_config(items, paired, tx_work_dir)
            _run_workflow(items, paired, tx_workflow_file, tx_work_dir)
    assert utils.file_exists(
        variant_file), "Manta finished without output file %s" % variant_file
    variant_file = shared.annotate_with_depth(variant_file, items)
    out = []
    for data in items:
        if paired and paired.normal_bam and "break-point-inspector" in dd.get_tools_on(
                data):
            variant_file = _run_break_point_inspector(data, variant_file,
                                                      paired)
        if "sv" not in data:
            data["sv"] = []
        final_vcf = shared.finalize_sv(variant_file, data, items)
        data["sv"].append({"variantcaller": "manta", "vrn_file": final_vcf})
        out.append(data)
    return out
Beispiel #3
0
def run(items):
    """Perform detection of structural variations with lumpy.
    """
    paired = vcfutils.get_paired(items)
    work_dir = _sv_workdir(
        paired.tumor_data if paired and paired.tumor_data else items[0])
    previous_evidence = {}
    full_bams, sr_bams, disc_bams = [], [], []
    for data in items:
        full_bams.append(dd.get_align_bam(data))
        sr_bam, disc_bam = sshared.find_existing_split_discordants(data)
        sr_bams.append(sr_bam)
        disc_bams.append(disc_bam)
        cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir)
        previous_evidence[dd.get_sample_name(data)] = {}
        if cur_dels and utils.file_exists(cur_dels):
            previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels
        if cur_dups and utils.file_exists(cur_dups):
            previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups
    lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams,
                                          work_dir, items)
    lumpy_vcf = sshared.annotate_with_depth(lumpy_vcf, items)
    gt_vcfs = {}
    # Retain paired samples with tumor/normal genotyped in one file
    if paired and paired.normal_name:
        batches = [[paired.tumor_data, paired.normal_data]]
    else:
        batches = [[x] for x in items]

    for batch_items in batches:
        for data in batch_items:
            gt_vcfs[dd.get_sample_name(data)] = _filter_by_support(
                lumpy_vcf, data)
    if paired and paired.normal_name:
        gt_vcfs = _filter_by_background(paired.tumor_name,
                                        [paired.normal_name], gt_vcfs,
                                        paired.tumor_data)
    out = []
    upload_counts = collections.defaultdict(int)
    for data in items:
        if "sv" not in data:
            data["sv"] = []
        vcf_file = gt_vcfs.get(dd.get_sample_name(data))
        if vcf_file:
            effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff")
            data["sv"].append({
                "variantcaller": "lumpy",
                "vrn_file": effects_vcf or vcf_file,
                "do_upload": upload_counts[vcf_file] ==
                0,  # only upload a single file per batch
                "exclude_file": exclude_file
            })
            upload_counts[vcf_file] += 1
        out.append(data)
    return out
Beispiel #4
0
def run(items):
    """Perform detection of structural variations with lumpy.
    """
    paired = vcfutils.get_paired(items)
    work_dir = _sv_workdir(paired.tumor_data if paired and paired.tumor_data else items[0])
    previous_evidence = {}
    full_bams, sr_bams, disc_bams = [], [], []
    for data in items:
        full_bams.append(dd.get_align_bam(data))
        sr_bam, disc_bam = sshared.find_existing_split_discordants(data)
        sr_bams.append(sr_bam)
        disc_bams.append(disc_bam)
        cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir)
        previous_evidence[dd.get_sample_name(data)] = {}
        if cur_dels and utils.file_exists(cur_dels):
            previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels
        if cur_dups and utils.file_exists(cur_dups):
            previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups
    lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items)
    lumpy_vcf = sshared.annotate_with_depth(lumpy_vcf, items)
    gt_vcfs = {}
    # Retain paired samples with tumor/normal genotyped in one file
    if paired and paired.normal_name:
        batches = [[paired.tumor_data, paired.normal_data]]
    else:
        batches = [[x] for x in items]

    for batch_items in batches:
        for data in batch_items:
            gt_vcfs[dd.get_sample_name(data)] = _filter_by_support(lumpy_vcf, data)
    if paired and paired.normal_name:
        gt_vcfs = _filter_by_background(paired.tumor_name, [paired.normal_name], gt_vcfs, paired.tumor_data)
    out = []
    upload_counts = collections.defaultdict(int)
    for data in items:
        if "sv" not in data:
            data["sv"] = []
        vcf_file = gt_vcfs.get(dd.get_sample_name(data))
        if vcf_file:
            effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff")
            data["sv"].append({"variantcaller": "lumpy",
                               "vrn_file": effects_vcf or vcf_file,
                               "do_upload": upload_counts[vcf_file] == 0,  # only upload a single file per batch
                               "exclude_file": exclude_file})
            upload_counts[vcf_file] += 1
        out.append(data)
    return out
Beispiel #5
0
def _add_variantcalls_to_output(out, data, items, is_somatic=False):
    """Call ploidy and convert into VCF and BED representations.
    """
    call_file = "%s-call%s" % os.path.splitext(out["cns"])
    if not utils.file_exists(call_file):
        with file_transaction(data, call_file) as tx_call_file:
            filters = ["--filter", "cn"]
            cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "call"] + \
                  filters + \
                   ["--ploidy", str(ploidy.get_ploidy([data])),
                    "-o", tx_call_file, out["cns"]]
            small_vrn_files = _compatible_small_variants(data, items)
            if len(small_vrn_files) > 0 and _cna_has_values(out["cns"]):
                cmd += [
                    "--vcf", small_vrn_files[0].name, "--sample-id",
                    small_vrn_files[0].sample
                ]
                if small_vrn_files[0].normal:
                    cmd += ["--normal-id", small_vrn_files[0].normal]
                if not is_somatic:
                    cmd += ["-m", "clonal"]
            gender = _get_batch_gender(items)
            if gender:
                cmd += ["--sample-sex", gender]
            do.run(cmd, "CNVkit call ploidy")
    calls = {}
    for outformat in ["bed", "vcf"]:
        out_file = "%s.%s" % (os.path.splitext(call_file)[0], outformat)
        calls[outformat] = out_file
        if not os.path.exists(out_file):
            with file_transaction(data, out_file) as tx_out_file:
                cmd = [
                    os.path.join(os.path.dirname(sys.executable), "cnvkit.py"),
                    "export", outformat, "--sample-id",
                    dd.get_sample_name(data), "--ploidy",
                    str(ploidy.get_ploidy([data])), "-o", tx_out_file,
                    call_file
                ]
                do.run(cmd, "CNVkit export %s" % outformat)
    out["call_file"] = call_file
    out["vrn_bed"] = annotate.add_genes(calls["bed"], data)
    effects_vcf, _ = effects.add_to_vcf(calls["vcf"], data, "snpeff")
    out["vrn_file"] = effects_vcf or calls["vcf"]
    out["vrn_file"] = shared.annotate_with_depth(out["vrn_file"], items)
    return out
Beispiel #6
0
def run(items):
    """Perform detection of structural variations with Manta.
    """
    paired = vcfutils.get_paired(items)
    data = paired.tumor_data if paired else items[0]
    work_dir = _sv_workdir(data)
    variant_file = _get_out_file(work_dir, paired)
    if not utils.file_exists(variant_file):
        with file_transaction(data, work_dir) as tx_work_dir:
            utils.safe_makedir(tx_work_dir)
            tx_workflow_file = _prep_config(items, paired, tx_work_dir)
            _run_workflow(items, paired, tx_workflow_file, tx_work_dir)
    assert utils.file_exists(
        variant_file), "Manta finished without output file %s" % variant_file
    variant_file = shared.annotate_with_depth(variant_file, items)
    out = []
    upload_counts = collections.defaultdict(int)
    for data in items:
        if "break-point-inspector" in dd.get_tools_on(data):
            if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name(
                    data):
                variant_file = _run_break_point_inspector(
                    data, variant_file, paired, work_dir)
        if "sv" not in data:
            data["sv"] = []
        final_vcf = shared.finalize_sv(variant_file, data, items)
        vc = {
            "variantcaller": "manta",
            "do_upload": upload_counts[final_vcf] ==
            0,  # only upload a single file per batch
            "vrn_file": final_vcf
        }
        evidence_bam = _get_evidence_bam(work_dir, data)
        if evidence_bam:
            vc["read_evidence"] = evidence_bam
        data["sv"].append(vc)
        upload_counts[final_vcf] += 1
        out.append(data)
    return out
Beispiel #7
0
def _add_variantcalls_to_output(out, data, items, is_somatic=False):
    """Call ploidy and convert into VCF and BED representations.
    """
    call_file = "%s-call%s" % os.path.splitext(out["cns"])
    if not utils.file_exists(call_file):
        with file_transaction(data, call_file) as tx_call_file:
            filters = ["--filter", "cn"]
            cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "call"] + \
                  filters + \
                   ["--ploidy", str(ploidy.get_ploidy([data])),
                    "-o", tx_call_file, out["cns"]]
            small_vrn_files = _compatible_small_variants(data, items)
            if len(small_vrn_files) > 0 and _cna_has_values(out["cns"]):
                cmd += ["--vcf", small_vrn_files[0].name, "--sample-id", small_vrn_files[0].sample]
                if small_vrn_files[0].normal:
                    cmd += ["--normal-id", small_vrn_files[0].normal]
                if not is_somatic:
                    cmd += ["-m", "clonal"]
            gender = _get_batch_gender(items)
            if gender:
                cmd += ["--sample-sex", gender]
            do.run(cmd, "CNVkit call ploidy")
    calls = {}
    for outformat in ["bed", "vcf"]:
        out_file = "%s.%s" % (os.path.splitext(call_file)[0], outformat)
        calls[outformat] = out_file
        if not os.path.exists(out_file):
            with file_transaction(data, out_file) as tx_out_file:
                cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "export",
                       outformat, "--sample-id", dd.get_sample_name(data),
                       "--ploidy", str(ploidy.get_ploidy([data])),
                       "-o", tx_out_file, call_file]
                do.run(cmd, "CNVkit export %s" % outformat)
    out["call_file"] = call_file
    out["vrn_bed"] = annotate.add_genes(calls["bed"], data)
    effects_vcf, _ = effects.add_to_vcf(calls["vcf"], data, "snpeff")
    out["vrn_file"] = effects_vcf or calls["vcf"]
    out["vrn_file"] = shared.annotate_with_depth(out["vrn_file"], items)
    return out