Beispiel #1
0
def combine_multiple_callers(samples):
    """Collapse together variant calls from multiple approaches into single data item with `variants`.
    """
    by_bam = collections.OrderedDict()
    for data in (x[0] for x in samples):
        work_bam = tz.get_in(("combine", "work_bam", "out"), data, data.get("align_bam"))
        jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data)
        variantcaller = get_variantcaller(data)
        key = (multi.get_batch_for_key(data), work_bam)
        if key not in by_bam:
            by_bam[key] = []
        by_bam[key].append((variantcaller, jointcaller, data))
    out = []
    for callgroup in by_bam.values():
        ready_calls = []
        for variantcaller, jointcaller, data in callgroup:
            if variantcaller:
                cur = data.get("vrn_file_plus", {})
                cur.update({"variantcaller": variantcaller,
                            "vrn_file": data.get("vrn_file_orig") if jointcaller else data.get("vrn_file"),
                            "vrn_file_batch": data.get("vrn_file_batch") if not jointcaller else None,
                            "vrn_stats": data.get("vrn_stats"),
                            "validate": data.get("validate") if not jointcaller else None})
                if jointcaller:
                    cur["population"] = False
                ready_calls.append(cur)
            if jointcaller:
                cur = {"variantcaller": jointcaller,
                       "vrn_file": data.get("vrn_file"),
                       "vrn_file_batch": data.get("vrn_file_batch"),
                       "validate": data.get("validate"),
                       "do_upload": False}
                if not variantcaller:
                    cur["population"] = {"vcf": data.get("vrn_file")}
                ready_calls.append(cur)
            if not jointcaller and not variantcaller:
                ready_calls.append({"variantcaller": "precalled",
                                    "vrn_file": data.get("vrn_file"),
                                    "validate": data.get("validate"),
                                    "do_upload": False})
        final = callgroup[0][-1]
        def orig_variantcaller_order(x):
            try:
                return final["config"]["algorithm"]["orig_variantcaller"].index(x["variantcaller"])
            except ValueError:
                return final["config"]["algorithm"]["orig_jointcaller"].index(x["variantcaller"])
        if len(ready_calls) > 1 and "orig_variantcaller" in final["config"]["algorithm"]:
            final["variants"] = sorted(ready_calls, key=orig_variantcaller_order)
            final["config"]["algorithm"]["variantcaller"] = final["config"]["algorithm"].pop("orig_variantcaller")
            if "orig_jointcaller" in final["config"]["algorithm"]:
                final["config"]["algorithm"]["jointcaller"] = final["config"]["algorithm"].pop("orig_jointcaller")
        else:
            final["variants"] = ready_calls
        final.pop("vrn_file_batch", None)
        final.pop("vrn_file_orig", None)
        final.pop("vrn_file_plus", None)
        final.pop("vrn_stats", None)
        out.append([final])
    return out
Beispiel #2
0
def combine_multiple_callers(samples):
    """Collapse together variant calls from multiple approaches into single data item with `variants`.
    """
    by_bam = collections.OrderedDict()
    for data in (x[0] for x in samples):
        work_bam = tz.get_in(("combine", "work_bam", "out"), data, data.get("align_bam"))
        jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data)
        variantcaller = get_variantcaller(data)
        key = (multi.get_batch_for_key(data), work_bam)
        if key not in by_bam:
            by_bam[key] = []
        by_bam[key].append((variantcaller, jointcaller, data))
    out = []
    for callgroup in by_bam.values():
        ready_calls = []
        for variantcaller, jointcaller, data in callgroup:
            if variantcaller:
                cur = data.get("vrn_file_plus", {})
                cur.update({"variantcaller": variantcaller,
                            "vrn_file": data.get("vrn_file_orig") if jointcaller else data.get("vrn_file"),
                            "vrn_file_batch": data.get("vrn_file_batch") if not jointcaller else None,
                            "vrn_stats": data.get("vrn_stats"),
                            "validate": data.get("validate") if not jointcaller else None})
                if jointcaller:
                    cur["population"] = False
                ready_calls.append(cur)
            if jointcaller:
                ready_calls.append({"variantcaller": jointcaller,
                                    "vrn_file": data.get("vrn_file"),
                                    "vrn_file_batch": data.get("vrn_file_batch"),
                                    "validate": data.get("validate"),
                                    "do_upload": False})
            if not jointcaller and not variantcaller:
                ready_calls.append({"variantcaller": "precalled",
                                    "vrn_file": data.get("vrn_file"),
                                    "validate": data.get("validate"),
                                    "do_upload": False})
        final = callgroup[0][-1]
        def orig_variantcaller_order(x):
            try:
                return final["config"]["algorithm"]["orig_variantcaller"].index(x["variantcaller"])
            except ValueError:
                return final["config"]["algorithm"]["orig_jointcaller"].index(x["variantcaller"])
        if len(ready_calls) > 1 and "orig_variantcaller" in final["config"]["algorithm"]:
            final["variants"] = sorted(ready_calls, key=orig_variantcaller_order)
            final["config"]["algorithm"]["variantcaller"] = final["config"]["algorithm"].pop("orig_variantcaller")
            if "orig_jointcaller" in final["config"]["algorithm"]:
                final["config"]["algorithm"]["jointcaller"] = final["config"]["algorithm"].pop("orig_jointcaller")
        else:
            final["variants"] = ready_calls
        final.pop("vrn_file_batch", None)
        final.pop("vrn_file_orig", None)
        final.pop("vrn_file_plus", None)
        final.pop("vrn_stats", None)
        out.append([final])
    return out
Beispiel #3
0
def _group_by_batches(samples, check_fn):
    """Group calls by batches, processing families together during ensemble calling.
    """
    batch_groups = collections.defaultdict(list)
    extras = []
    for data in [x[0] for x in samples]:
        if check_fn(data):
            batch_groups[multi.get_batch_for_key(data)].append(data)
        else:
            extras.append([data])
    return batch_groups, extras
Beispiel #4
0
def _group_by_batches(samples, check_fn):
    """Group calls by batches, processing families together during ensemble calling.
    """
    batch_groups = collections.defaultdict(list)
    extras = []
    for data in [x[0] for x in samples]:
        if check_fn(data):
            batch_groups[multi.get_batch_for_key(data)].append(data)
        else:
            extras.append([data])
    return batch_groups, extras
Beispiel #5
0
def _collapse_by_bam_variantcaller(samples):
    """Collapse regions to a single representative by BAM input, variant caller and batch.
    """
    by_bam = collections.OrderedDict()
    for data in (x[0] for x in samples):
        work_bam = utils.get_in(data, ("combine", "work_bam", "out"), data.get("align_bam"))
        variantcaller = get_variantcaller(data)
        if isinstance(work_bam, list):
            work_bam = tuple(work_bam)
        key = (multi.get_batch_for_key(data), work_bam, variantcaller)
        try:
            by_bam[key].append(data)
        except KeyError:
            by_bam[key] = [data]
    out = []
    for grouped_data in by_bam.values():
        cur = grouped_data[0]
        cur.pop("region", None)
        region_bams = cur.pop("region_bams", None)
        if region_bams and len(region_bams[0]) > 1:
            cur.pop("work_bam", None)
        out.append([cur])
    return out
Beispiel #6
0
def _collapse_by_bam_variantcaller(samples):
    """Collapse regions to a single representative by BAM input, variant caller and batch.
    """
    by_bam = collections.OrderedDict()
    for data in (x[0] for x in samples):
        work_bam = utils.get_in(data, ("combine", "work_bam", "out"), data.get("align_bam"))
        variantcaller = get_variantcaller(data)
        if isinstance(work_bam, list):
            work_bam = tuple(work_bam)
        key = (multi.get_batch_for_key(data), work_bam, variantcaller)
        try:
            by_bam[key].append(data)
        except KeyError:
            by_bam[key] = [data]
    out = []
    for grouped_data in by_bam.values():
        cur = grouped_data[0]
        cur.pop("region", None)
        region_bams = cur.pop("region_bams", None)
        if region_bams and len(region_bams[0]) > 1:
            cur.pop("work_bam", None)
        out.append([cur])
    return out