def _combine_multiple_svcallers(samples): """ """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x[0]["align_bam"]].append(x[0]) except KeyError: by_bam[x[0]["align_bam"]] = [x[0]] highdepths = filter( lambda x: x is not None, list( set([ tz.get_in(["config", "algorithm", "highdepth_regions"], x[0]) for x in samples ]))) out = [] for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index( x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final = grouped_calls[0] final_calls = ensemble.summarize(final_calls, final, highdepths) final_calls = validate.evaluate(final, final_calls) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] out.append([final]) return out
def _combine_multiple_svcallers(samples): """ """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x[0]["align_bam"]].append(x[0]) except KeyError: by_bam[x[0]["align_bam"]] = [x[0]] out = [] for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index( x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final = grouped_calls[0] final_calls = ensemble.summarize(final_calls, final) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] out.append([final]) return out
def finalize_sv(samples, config, initial_only=False): """Combine results from multiple sv callers into a single ordered 'sv' key. Handles ensemble calling and plotting of results. """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x["align_bam"]].append(x) except KeyError: by_bam[x["align_bam"]] = [x] by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) if not initial_only: for caller in (c for c in _get_svcallers(final) if c in _ENSEMBLE_CALLERS): final_calls = _ENSEMBLE_CALLERS[caller](final_calls, final) final_calls = ensemble.summarize(final_calls, final, grouped_calls) final_calls = validate.evaluate(final, final_calls) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] lead_batches[dd.get_sample_name(final)] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): if any("svplots" in dd.get_tools_on(d) for d in items): plot_items = plot.by_regions(items) else: plot_items = items for data in plot_items: if lead_batches[dd.get_sample_name(data)] == batch: out.append([data]) return out
def finalize_sv(samples, config): """Combine results from multiple sv callers into a single ordered 'sv' key. Handles ensemble calling and plotting of results. """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x["align_bam"]].append(x) except KeyError: by_bam[x["align_bam"]] = [x] highdepths = filter( lambda x: x is not None, list(set([tz.get_in(["config", "algorithm", "highdepth_regions"], x) for x in samples])), ) by_batch = collections.OrderedDict() lead_batches = {} for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final = grouped_calls[0] if len(sorted_svcalls) > 0: final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final_calls = ensemble.summarize(final_calls, final, highdepths) final_calls = validate.evaluate(final, final_calls) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] batch = dd.get_batch(final) or dd.get_sample_name(final) batches = batch if isinstance(batch, (list, tuple)) else [batch] lead_batches[dd.get_sample_name(final)] = batches[0] for batch in batches: try: by_batch[batch].append(final) except KeyError: by_batch[batch] = [final] out = [] for batch, items in by_batch.items(): plot_items = plot.by_regions(items) for data in plot_items: if lead_batches[dd.get_sample_name(data)] == batch: out.append([data]) return out
def _combine_multiple_svcallers(samples): """ """ by_bam = collections.OrderedDict() for x in samples: try: by_bam[x[0]["align_bam"]].append(x[0]) except KeyError: by_bam[x[0]["align_bam"]] = [x[0]] out = [] for grouped_calls in by_bam.values(): def orig_svcaller_order(x): return _get_svcallers(x).index(x["config"]["algorithm"]["svcaller_active"]) sorted_svcalls = sorted([x for x in grouped_calls if "sv" in x], key=orig_svcaller_order) final_calls = reduce(operator.add, [x["sv"] for x in sorted_svcalls]) final = grouped_calls[0] final_calls = ensemble.summarize(final_calls, final) final_calls = validate.evaluate(final, final_calls) final["sv"] = final_calls del final["config"]["algorithm"]["svcaller_active"] out.append([final]) return out