Exemple #1
0
def _combine_multiple_svcallers(samples):
    """
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x[0]["align_bam"]].append(x[0])
        except KeyError:
            by_bam[x[0]["align_bam"]] = [x[0]]
    highdepths = filter(
        lambda x: x is not None,
        list(
            set([
                tz.get_in(["config", "algorithm", "highdepth_regions"], x[0])
                for x in samples
            ])))
    out = []
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(
                x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
        final = grouped_calls[0]
        final_calls = ensemble.summarize(final_calls, final, highdepths)
        final_calls = validate.evaluate(final, final_calls)
        final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        out.append([final])
    return out
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    items = sample-sv_caller list, from one batch
    """
    items = [utils.to_single_data(x) for x in items]
    items = cwlutils.unpack_tarballs(items, items[0])
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage, special_cases=True).get(svcaller)
    out = []
    batch = dd.get_batch(items[0])
    # no SV calling when just creating a PON for PureCN
    if batch == "pon_build" and "purecn" in dd.get_svcaller(items[0]):
        return out
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND
                and not vcfutils.is_paired_analysis(
                    [x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [
                x for x in all_items if dd.get_sample_name(x) not in names
            ]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    # Avoid nesting of callers for CWL runs for easier extraction
    if cwlutils.is_cwl_run(items[0]):
        out_cwl = []
        for data in [utils.to_single_data(x) for x in out]:
            # Run validation directly from CWL runs since we're single stage
            data = validate.evaluate(data)
            data["svvalidate"] = {
                "summary": tz.get_in(["sv-validate", "csv"], data)
            }
            svs = data.get("sv")
            if svs:
                assert len(svs) == 1, svs
                data["sv"] = svs[0]
            else:
                data["sv"] = {}
            data = _add_supplemental(data)
            out_cwl.append([data])
        return out_cwl
    return out
def finalize_sv(samples, config, initial_only=False):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            if not initial_only:
                for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS):
                    final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final)
                final_calls = ensemble.summarize(final_calls, final, grouped_calls)
                final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemple #4
0
def finalize_sv(samples, config, initial_only=False):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():
        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])
        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            if not initial_only:
                for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS):
                    final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final)
                final_calls = ensemble.summarize(final_calls, final, grouped_calls)
                final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        if any("svplots" in dd.get_tools_on(d) for d in items):
            plot_items = plot.by_regions(items)
        else:
            plot_items = items
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemple #5
0
def finalize_sv(samples, config):
    """Combine results from multiple sv callers into a single ordered 'sv' key.

    Handles ensemble calling and plotting of results.
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x["align_bam"]].append(x)
        except KeyError:
            by_bam[x["align_bam"]] = [x]
    highdepths = filter(
        lambda x: x is not None,
        list(set([tz.get_in(["config", "algorithm", "highdepth_regions"], x) for x in samples])),
    )
    by_batch = collections.OrderedDict()
    lead_batches = {}
    for grouped_calls in by_bam.values():

        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])

        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order)
        final = grouped_calls[0]
        if len(sorted_svcalls) > 0:
            final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
            final_calls = ensemble.summarize(final_calls, final, highdepths)
            final_calls = validate.evaluate(final, final_calls)
            final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        batch = dd.get_batch(final) or dd.get_sample_name(final)
        batches = batch if isinstance(batch, (list, tuple)) else [batch]
        lead_batches[dd.get_sample_name(final)] = batches[0]
        for batch in batches:
            try:
                by_batch[batch].append(final)
            except KeyError:
                by_batch[batch] = [final]
    out = []
    for batch, items in by_batch.items():
        plot_items = plot.by_regions(items)
        for data in plot_items:
            if lead_batches[dd.get_sample_name(data)] == batch:
                out.append([data])
    return out
Exemple #6
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    """
    items = [utils.to_single_data(x) for x in items]
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage, special_cases=True).get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND
                and not vcfutils.is_paired_analysis(
                    [x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [
                x for x in all_items if dd.get_sample_name(x) not in names
            ]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    # Avoid nesting of callers for CWL runs for easier extraction
    if "cwl_keys" in items[0]:
        out_cwl = []
        for data in [utils.to_single_data(x) for x in out]:
            # Run validation directly from CWL runs since we're single stage
            data = validate.evaluate(data)
            data["svvalidate"] = {
                "summary": tz.get_in(["sv-validate", "csv"], data)
            }
            svs = data.get("sv")
            if svs:
                assert len(svs) == 1, svs
                data["sv"] = svs[0]
            out_cwl.append([data])
        return out_cwl
    return out
Exemple #7
0
def detect_sv(items, all_items=None, stage="standard"):
    """Top level parallel target for examining structural variation.
    """
    items = [utils.to_single_data(x) for x in items]
    items = cwlutils.unpack_tarballs(items, items[0])
    svcaller = items[0]["config"]["algorithm"].get("svcaller")
    caller_fn = _get_callers(items, stage, special_cases=True).get(svcaller)
    out = []
    if svcaller and caller_fn:
        if (all_items and svcaller in _NEEDS_BACKGROUND and
                not vcfutils.is_paired_analysis([x.get("align_bam") for x in items], items)):
            names = set([dd.get_sample_name(x) for x in items])
            background = [x for x in all_items if dd.get_sample_name(x) not in names]
            for svdata in caller_fn(items, background):
                out.append([svdata])
        else:
            for svdata in caller_fn(items):
                out.append([svdata])
    else:
        for data in items:
            out.append([data])
    # Avoid nesting of callers for CWL runs for easier extraction
    if cwlutils.is_cwl_run(items[0]):
        out_cwl = []
        for data in [utils.to_single_data(x) for x in out]:
            # Run validation directly from CWL runs since we're single stage
            data = validate.evaluate(data)
            data["svvalidate"] = {"summary": tz.get_in(["sv-validate", "csv"], data)}
            svs = data.get("sv")
            if svs:
                assert len(svs) == 1, svs
                data["sv"] = svs[0]
            else:
                data["sv"] = {}
            data = _add_supplemental(data)
            out_cwl.append([data])
        return out_cwl
    return out
Exemple #8
0
def _combine_multiple_svcallers(samples):
    """
    """
    by_bam = collections.OrderedDict()
    for x in samples:
        try:
            by_bam[x[0]["align_bam"]].append(x[0])
        except KeyError:
            by_bam[x[0]["align_bam"]] = [x[0]]
    out = []
    for grouped_calls in by_bam.values():
        def orig_svcaller_order(x):
            return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"])
        sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x],
                                key=orig_svcaller_order)
        final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls])
        final = grouped_calls[0]
        final_calls = ensemble.summarize(final_calls, final)
        final_calls = validate.evaluate(final, final_calls)
        final["sv"] = final_calls
        del final["config"]["algorithm"]["svcaller_active"]
        out.append([final])
    return out
Exemple #9
0
def validate_sv(data):
    """Validate structural variant calls for a sample.
    """
    return [[validate.evaluate(data)]]
Exemple #10
0
def validate_sv(data):
    """Validate structural variant calls for a sample.
    """
    return [[validate.evaluate(data)]]