Ejemplo n.º 1
0
def run_freebayes(align_bams,
                  items,
                  ref_file,
                  assoc_files,
                  region=None,
                  out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    if is_paired_analysis(align_bams, items):
        paired = get_paired_bams(align_bams, items)
        if not paired.normal_bam:
            call_file = _run_freebayes_caller(align_bams,
                                              items,
                                              ref_file,
                                              assoc_files,
                                              region,
                                              out_file,
                                              somatic=paired)
        else:
            call_file = _run_freebayes_paired(align_bams, items, ref_file,
                                              assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)

    return call_file
Ejemplo n.º 2
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    """
    items = [utils.to_single_data(x) for x in items]
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage).get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND
                and not vcfutils.is_paired_analysis(
                    [x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [
                x for x in all_items if dd.get_sample_name(x) not in names
            ]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    # Avoid nesting of callers for CWL runs for easier extraction
    if "cwl_keys" in items[0]:
        out_cwl = []
        for data in [utils.to_single_data(x) for x in out]:
            svs = data.get("sv")
            if svs:
                assert len(svs) == 1, svs
                data["sv"] = svs[0]
            out_cwl.append([data])
        return out_cwl
    return out
Ejemplo n.º 3
0
def detect_sv(items, all_items, config):
    """Top level parallel target for examining structural variation.
    """
    svcaller = config["algorithm"].get("svcaller_active")
    out = []
    if svcaller:
        if svcaller in _CALLERS:
            assert len(items) == 1
            data = items[0]
            data["sv"] = _CALLERS[svcaller](data)
            out.append([data])
        elif svcaller in _BATCH_CALLERS:
            if (svcaller in _NEEDS_BACKGROUND and
                  not vcfutils.is_paired_analysis([x.get("align_bam") for x in items], items)):
                names = set([tz.get_in(["rgnames", "sample"], x) for x in items])
                background = [x for x in all_items if tz.get_in(["rgnames", "sample"], x) not in names]
                for svdata in _BATCH_CALLERS[svcaller](items, background):
                    out.append([svdata])
            else:
                for svdata in _BATCH_CALLERS[svcaller](items):
                    out.append([svdata])
        else:
            raise ValueError("Unexpected structural variant caller: %s" % svcaller)
    else:
        out.append(items)
    return out
Ejemplo n.º 4
0
def detect_sv(items, all_items, config):
    """Top level parallel target for examining structural variation.
    """
    svcaller = config["algorithm"].get("svcaller_active")
    out = []
    if svcaller:
        if svcaller in _CALLERS:
            assert len(items) == 1
            data = items[0]
            data["sv"] = _CALLERS[svcaller](data)
            out.append([data])
        elif svcaller in _BATCH_CALLERS:
            if (svcaller in _NEEDS_BACKGROUND
                    and not vcfutils.is_paired_analysis(
                        [x.get("align_bam") for x in items], items)):
                names = set(
                    [tz.get_in(["rgnames", "sample"], x) for x in items])
                background = [
                    x for x in all_items
                    if tz.get_in(["rgnames", "sample"], x) not in names
                ]
                for svdata in _BATCH_CALLERS[svcaller](items, background):
                    out.append([svdata])
            else:
                for svdata in _BATCH_CALLERS[svcaller](items):
                    out.append([svdata])
        else:
            raise ValueError("Unexpected structural variant caller: %s" %
                             svcaller)
    else:
        out.append(items)
    return out
Ejemplo n.º 5
0
def run_freebayes(align_bams,
                  items,
                  ref_file,
                  assoc_files,
                  region=None,
                  out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    if is_paired_analysis(align_bams, items):
        paired = get_paired_bams(align_bams, items)
        if not paired.normal_bam:
            call_file = _run_freebayes_caller(align_bams,
                                              items,
                                              ref_file,
                                              assoc_files,
                                              region,
                                              out_file,
                                              somatic=paired)
        else:
            call_file = _run_freebayes_paired(
                [paired.tumor_bam, paired.normal_bam],
                [paired.tumor_data, paired.normal_data], ref_file, assoc_files,
                region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)

    return call_file
Ejemplo n.º 6
0
def shared_variantcall(call_fn, name, align_bams, ref_file, items,
                       assoc_files, region=None, out_file=None):
    """Provide base functionality for prepping and indexing for variant calling.
    """
    config = items[0]["config"]
    if out_file is None:
        if vcfutils.is_paired_analysis(align_bams, items):
            out_file = "%s-paired-variants.vcf.gz" % config["metdata"]["batch"]
        else:
            out_file = "%s-variants.vcf.gz" % os.path.splitext(align_bams[0])[0]
    if not file_exists(out_file):
        logger.debug("Genotyping with {name}: {region} {fname}".format(
              name=name, region=region, fname=os.path.basename(align_bams[0])))
        variant_regions = bedutils.merge_overlaps(bedutils.population_variant_regions(items), items[0])
        target_regions = subset_variant_regions(variant_regions, region, out_file)
        if (variant_regions is not None and isinstance(target_regions, basestring)
              and not os.path.isfile(target_regions)):
            vcfutils.write_empty_vcf(out_file, config)
        else:
            with file_transaction(config, out_file) as tx_out_file:
                call_fn(align_bams, ref_file, items, target_regions,
                        tx_out_file)
    if out_file.endswith(".gz"):
        out_file = vcfutils.bgzip_and_index(out_file, config)
    ann_file = annotation.annotate_nongatk_vcf(out_file, align_bams, assoc_files.get("dbsnp"),
                                               ref_file, config)
    return ann_file
Ejemplo n.º 7
0
def shared_variantcall(call_fn, name, align_bams, ref_file, items,
                       assoc_files, region=None, out_file=None):
    """Provide base functionality for prepping and indexing for variant calling.
    """
    config = items[0]["config"]
    if out_file is None:
        if vcfutils.is_paired_analysis(align_bams, items):
            out_file = "%s-paired-variants.vcf" % config["metdata"]["batch"]
        else:
            out_file = "%s-variants.vcf" % os.path.splitext(align_bams[0])[0]
    if not file_exists(out_file):
        logger.info("Genotyping with {name}: {region} {fname}".format(
            name=name, region=region, fname=os.path.basename(align_bams[0])))
        for x in align_bams:
            bam.index(x, config)
        variant_regions = config["algorithm"].get("variant_regions", None)
        target_regions = subset_variant_regions(variant_regions, region, out_file)
        if ((variant_regions is not None and isinstance(target_regions, basestring)
              and not os.path.isfile(target_regions))
              or not all(realign.has_aligned_reads(x, region) for x in align_bams)):
            vcfutils.write_empty_vcf(out_file)
        else:
            with file_transaction(out_file) as tx_out_file:
                call_fn(align_bams, ref_file, items, target_regions,
                        tx_out_file)
    ann_file = annotation.annotate_nongatk_vcf(out_file, align_bams, assoc_files["dbsnp"],
                                               ref_file, config)
    return ann_file
Ejemplo n.º 8
0
def shared_variantcall(call_fn, name, align_bams, ref_file, items,
                       assoc_files, region=None, out_file=None):
    """Provide base functionality for prepping and indexing for variant calling.
    """
    config = items[0]["config"]
    if out_file is None:
        if vcfutils.is_paired_analysis(align_bams, items):
            out_file = "%s-paired-variants.vcf.gz" % config["metdata"]["batch"]
        else:
            out_file = "%s-variants.vcf.gz" % os.path.splitext(align_bams[0])[0]
    if not file_exists(out_file):
        logger.debug("Genotyping with {name}: {region} {fname}".format(
              name=name, region=region, fname=os.path.basename(align_bams[0])))
        variant_regions = bedutils.merge_overlaps(bedutils.population_variant_regions(items), items[0])
        target_regions = subset_variant_regions(variant_regions, region, out_file, items=items)
        if (variant_regions is not None and isinstance(target_regions, basestring)
              and not os.path.isfile(target_regions)):
            vcfutils.write_empty_vcf(out_file, config)
        else:
            with file_transaction(config, out_file) as tx_out_file:
                call_fn(align_bams, ref_file, items, target_regions,
                        tx_out_file)
    if out_file.endswith(".gz"):
        out_file = vcfutils.bgzip_and_index(out_file, config)
    ann_file = annotation.annotate_nongatk_vcf(out_file, align_bams, assoc_files.get("dbsnp"),
                                               ref_file, config)
    return ann_file
Ejemplo n.º 9
0
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None):

    if is_paired_analysis(align_bams, items):
        call_file = _run_freebayes_paired(align_bams, items, ref_file, assoc_files, region, out_file)
    else:
        call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file)

    return call_file
Ejemplo n.º 10
0
def run_vardict(align_bams, items, ref_file, assoc_files, region=None, out_file=None):
    """Run VarDict variant calling.
    """
    if vcfutils.is_paired_analysis(align_bams, items):
        call_file = _run_vardict_paired(align_bams, items, ref_file, assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_vardict_caller(align_bams, items, ref_file, assoc_files, region, out_file)
    return call_file
Ejemplo n.º 11
0
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    if is_paired_analysis(align_bams, items):
        call_file = _run_freebayes_paired(align_bams, items, ref_file, assoc_files, region, out_file)
    else:
        call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file)

    return call_file
Ejemplo n.º 12
0
def run_scalpel(align_bams, items, ref_file, assoc_files, region=None, out_file=None):
    """Run Scalpel indel calling, either paired tumor/normal or germline calling.
    """
    if region is None:
        message = "A region must be provided for Scalpel"
        raise ValueError(message)
    if is_paired_analysis(align_bams, items):
        call_file = _run_scalpel_paired(align_bams, items, ref_file, assoc_files, region, out_file)
    else:
        call_file = _run_scalpel_caller(align_bams, items, ref_file, assoc_files, region, out_file)
    return call_file
Ejemplo n.º 13
0
def run(align_bams, items, ref_file, assoc_files, region=None, out_file=None):
    """Run strelka2 variant calling, either paired tumor/normal or germline calling.
    """
    if vcfutils.is_paired_analysis(align_bams, items):
        paired = vcfutils.get_paired_bams(align_bams, items)
        assert paired.normal_bam, "Strelka2 requires a normal sample"
        call_file = _run_somatic(paired, ref_file, assoc_files, region,
                                 out_file)
    else:
        call_file = _run_germline(align_bams, items, ref_file, assoc_files,
                                  region, out_file)
    return call_file
Ejemplo n.º 14
0
def _pick_lead_item(items):
    """Pick single representative sample for batch calling to attach calls to.

    For cancer samples, attach to tumor.
    """
    if vcfutils.is_paired_analysis([x["align_bam"] for x in items], items):
        for data in items:
            if vcfutils.get_paired_phenotype(data) == "tumor":
                return data
        raise ValueError("Did not find tumor sample in paired tumor/normal calling")
    else:
        return items[0]
Ejemplo n.º 15
0
def run(align_bams, items, ref_file, assoc_files, region, out_file):
    """Return DeepVariant calling on germline samples.

    region can be a single region or list of multiple regions for multicore calling.
    """
    assert not vcfutils.is_paired_analysis(align_bams, items), \
        ("DeepVariant currently only supports germline calling: %s" %
         (", ".join([dd.get_sample_name(d) for d in items])))
    assert len(items) == 1, \
        ("DeepVariant currently only supports single sample calling: %s" %
         (", ".join([dd.get_sample_name(d) for d in items])))
    return _run_germline(align_bams[0], items[0], ref_file, region, out_file)
Ejemplo n.º 16
0
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None,
                  out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    if is_paired_analysis(align_bams, items):
        call_file = _run_freebayes_paired(align_bams, items, ref_file,
                                          assoc_files, region, out_file)
    else:
        call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)

    return call_file
Ejemplo n.º 17
0
def run_vardict(align_bams, items, ref_file, assoc_files, region=None,
                  out_file=None):
    """Run VarDict variant calling.
    """
    if vcfutils.is_paired_analysis(align_bams, items):
        call_file = _run_vardict_paired(align_bams, items, ref_file,
                                        assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_vardict_caller(align_bams, items, ref_file,
                                        assoc_files, region, out_file)
    return call_file
Ejemplo n.º 18
0
def _pick_lead_item(items):
    """Pick single representative sample for batch calling to attach calls to.

    For cancer samples, attach to tumor.
    """
    if vcfutils.is_paired_analysis([x["align_bam"] for x in items], items):
        for data in items:
            if vcfutils.get_paired_phenotype(data) == "tumor":
                return data
        raise ValueError("Did not find tumor sample in paired tumor/normal calling")
    else:
        return items[0]
Ejemplo n.º 19
0
def run_varscan(align_bams, items, ref_file, assoc_files,
                region=None, out_file=None):
    if is_paired_analysis(align_bams, items):
        call_file = samtools.shared_variantcall(_varscan_paired, "varscan",
                                                align_bams, ref_file, items,
                                                assoc_files, region, out_file)
    else:
        call_file = samtools.shared_variantcall(_varscan_work, "varscan",
                                                align_bams, ref_file,
                                                items, assoc_files,
                                                region, out_file)
    return call_file
Ejemplo n.º 20
0
def run(align_bams, items, ref_file, assoc_files, region, out_file):
    """Run strelka2 variant calling, either paired tumor/normal or germline calling.

    region can be a single region or list of multiple regions for multicore calling.
    """
    if vcfutils.is_paired_analysis(align_bams, items):
        paired = vcfutils.get_paired_bams(align_bams, items)
        assert paired.normal_bam, "Strelka2 requires a normal sample"
        call_file = _run_somatic(paired, ref_file, assoc_files, region, out_file)
    else:
        call_file = _run_germline(align_bams, items, ref_file,
                                  assoc_files, region, out_file)
    return call_file
Ejemplo n.º 21
0
def run_scalpel(align_bams, items, ref_file, assoc_files, region=None,
                  out_file=None):
    """Run Scalpel indel calling, either paired tumor/normal or germline calling.
    """
    if region is None:
        message = ("A region must be provided for Scalpel")
        raise ValueError(message)
    if is_paired_analysis(align_bams, items):
        call_file = _run_scalpel_paired(align_bams, items, ref_file,
                                          assoc_files, region, out_file)
    else:
        call_file = _run_scalpel_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)
    return call_file
Ejemplo n.º 22
0
def run(align_bams, items, ref_file, assoc_files, region, out_file):
    """Return DeepVariant calling on germline samples.

    region can be a single region or list of multiple regions for multicore calling.
    """
    assert not vcfutils.is_paired_analysis(align_bams, items), \
        ("DeepVariant currently only supports germline calling: %s" %
         (", ".join([dd.get_sample_name(d) for d in items])))
    assert len(items) == 1, \
        ("DeepVariant currently only supports single sample calling: %s" %
         (", ".join([dd.get_sample_name(d) for d in items])))
    out_file = _run_germline(align_bams[0], items[0], ref_file,
                             region, out_file)
    return vcfutils.bgzip_and_index(out_file, items[0]["config"])
Ejemplo n.º 23
0
def run_varscan(align_bams,
                items,
                ref_file,
                assoc_files,
                region=None,
                out_file=None):
    if is_paired_analysis(align_bams, items):
        call_file = samtools.shared_variantcall(_varscan_paired, "varscan",
                                                align_bams, ref_file, items,
                                                assoc_files, region, out_file)
    else:
        call_file = samtools.shared_variantcall(_varscan_work, "varscan",
                                                align_bams, ref_file, items,
                                                assoc_files, region, out_file)
    return call_file
Ejemplo n.º 24
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    items = sample-sv_caller list, from one batch
    """
    items = [utils.to_single_data(x) for x in items]
    items = cwlutils.unpack_tarballs(items, items[0])
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage, special_cases=True).get(svcaller)
    out = []
    batch = dd.get_batch(items[0])
    # no SV calling when just creating a PON for PureCN
    if batch == "pon_build" and "purecn" in dd.get_svcaller(items[0]):
        return out
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND
                and not vcfutils.is_paired_analysis(
                    [x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [
                x for x in all_items if dd.get_sample_name(x) not in names
            ]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    # Avoid nesting of callers for CWL runs for easier extraction
    if cwlutils.is_cwl_run(items[0]):
        out_cwl = []
        for data in [utils.to_single_data(x) for x in out]:
            # Run validation directly from CWL runs since we're single stage
            data = validate.evaluate(data)
            data["svvalidate"] = {
                "summary": tz.get_in(["sv-validate", "csv"], data)
            }
            svs = data.get("sv")
            if svs:
                assert len(svs) == 1, svs
                data["sv"] = svs[0]
            else:
                data["sv"] = {}
            data = _add_supplemental(data)
            out_cwl.append([data])
        return out_cwl
    return out
Ejemplo n.º 25
0
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    if is_paired_analysis(align_bams, items):
        paired = get_paired_bams(align_bams, items)
        if not paired.normal_bam:
            call_file = _run_freebayes_caller(
                align_bams, items, ref_file, assoc_files, region, out_file, somatic=paired
            )
        else:
            call_file = _run_freebayes_paired(align_bams, items, ref_file, assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file)

    return call_file
Ejemplo n.º 26
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    """
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage).get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND and
                not vcfutils.is_paired_analysis([x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [x for x in all_items if dd.get_sample_name(x) not in names]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    return out
Ejemplo n.º 27
0
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None,
                  out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    if is_paired_analysis(align_bams, items):
        paired = get_paired_bams(align_bams, items)
        if not paired.normal_bam:
            call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                              assoc_files, region, out_file, somatic=paired)
        else:
            call_file = _run_freebayes_paired([paired.tumor_bam, paired.normal_bam],
                                              [paired.tumor_data, paired.normal_data],
                                              ref_file, assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)

    return call_file
Ejemplo n.º 28
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    """
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _CALLERS[stage].get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND and
                not vcfutils.is_paired_analysis([x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [x for x in all_items if dd.get_sample_name(x) not in names]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    return out
Ejemplo n.º 29
0
def detect_sv(items, all_items, config, stage):
    """Top level parallel target for examining structural variation.
    """
    svcaller = config["algorithm"].get("svcaller_active")
    caller_fn = _CALLERS[stage].get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (svcaller in _NEEDS_BACKGROUND and
                not vcfutils.is_paired_analysis([x.get("align_bam") for x in items], items)):
            names = set([tz.get_in(["rgnames", "sample"], x) for x in items])
            background = [x for x in all_items if tz.get_in(["rgnames", "sample"], x) not in names]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    return out
Ejemplo n.º 30
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    """
    items = [utils.to_single_data(x) for x in items]
    items = cwlutils.unpack_tarballs(items, items[0])
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage, special_cases=True).get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND and
                not vcfutils.is_paired_analysis([x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [x for x in all_items if dd.get_sample_name(x) not in names]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    # Avoid nesting of callers for CWL runs for easier extraction
    if cwlutils.is_cwl_run(items[0]):
        out_cwl = []
        for data in [utils.to_single_data(x) for x in out]:
            # Run validation directly from CWL runs since we're single stage
            data = validate.evaluate(data)
            data["svvalidate"] = {"summary": tz.get_in(["sv-validate", "csv"], data)}
            svs = data.get("sv")
            if svs:
                assert len(svs) == 1, svs
                data["sv"] = svs[0]
            else:
                data["sv"] = {}
            data = _add_supplemental(data)
            out_cwl.append([data])
        return out_cwl
    return out