def combine_multiple_callers(samples): """Collapse together variant calls from multiple approaches into single data item with `variants`. """ by_bam = collections.OrderedDict() for data in (x[0] for x in samples): work_bam = tz.get_in(("combine", "work_bam", "out"), data, data.get("align_bam")) jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data) variantcaller = get_variantcaller(data) key = (multi.get_batch_for_key(data), work_bam) if key not in by_bam: by_bam[key] = [] by_bam[key].append((variantcaller, jointcaller, data)) out = [] for callgroup in by_bam.values(): ready_calls = [] for variantcaller, jointcaller, data in callgroup: if variantcaller: cur = data.get("vrn_file_plus", {}) cur.update({"variantcaller": variantcaller, "vrn_file": data.get("vrn_file_orig") if jointcaller else data.get("vrn_file"), "vrn_file_batch": data.get("vrn_file_batch") if not jointcaller else None, "vrn_stats": data.get("vrn_stats"), "validate": data.get("validate") if not jointcaller else None}) if jointcaller: cur["population"] = False ready_calls.append(cur) if jointcaller: cur = {"variantcaller": jointcaller, "vrn_file": data.get("vrn_file"), "vrn_file_batch": data.get("vrn_file_batch"), "validate": data.get("validate"), "do_upload": False} if not variantcaller: cur["population"] = {"vcf": data.get("vrn_file")} ready_calls.append(cur) if not jointcaller and not variantcaller: ready_calls.append({"variantcaller": "precalled", "vrn_file": data.get("vrn_file"), "validate": data.get("validate"), "do_upload": False}) final = callgroup[0][-1] def orig_variantcaller_order(x): try: return final["config"]["algorithm"]["orig_variantcaller"].index(x["variantcaller"]) except ValueError: return final["config"]["algorithm"]["orig_jointcaller"].index(x["variantcaller"]) if len(ready_calls) > 1 and "orig_variantcaller" in final["config"]["algorithm"]: final["variants"] = sorted(ready_calls, key=orig_variantcaller_order) final["config"]["algorithm"]["variantcaller"] = final["config"]["algorithm"].pop("orig_variantcaller") if "orig_jointcaller" in final["config"]["algorithm"]: final["config"]["algorithm"]["jointcaller"] = final["config"]["algorithm"].pop("orig_jointcaller") else: final["variants"] = ready_calls final.pop("vrn_file_batch", None) final.pop("vrn_file_orig", None) final.pop("vrn_file_plus", None) final.pop("vrn_stats", None) out.append([final]) return out
def combine_multiple_callers(samples): """Collapse together variant calls from multiple approaches into single data item with `variants`. """ by_bam = collections.OrderedDict() for data in (x[0] for x in samples): work_bam = tz.get_in(("combine", "work_bam", "out"), data, data.get("align_bam")) jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data) variantcaller = get_variantcaller(data) key = (multi.get_batch_for_key(data), work_bam) if key not in by_bam: by_bam[key] = [] by_bam[key].append((variantcaller, jointcaller, data)) out = [] for callgroup in by_bam.values(): ready_calls = [] for variantcaller, jointcaller, data in callgroup: if variantcaller: cur = data.get("vrn_file_plus", {}) cur.update({"variantcaller": variantcaller, "vrn_file": data.get("vrn_file_orig") if jointcaller else data.get("vrn_file"), "vrn_file_batch": data.get("vrn_file_batch") if not jointcaller else None, "vrn_stats": data.get("vrn_stats"), "validate": data.get("validate") if not jointcaller else None}) if jointcaller: cur["population"] = False ready_calls.append(cur) if jointcaller: ready_calls.append({"variantcaller": jointcaller, "vrn_file": data.get("vrn_file"), "vrn_file_batch": data.get("vrn_file_batch"), "validate": data.get("validate"), "do_upload": False}) if not jointcaller and not variantcaller: ready_calls.append({"variantcaller": "precalled", "vrn_file": data.get("vrn_file"), "validate": data.get("validate"), "do_upload": False}) final = callgroup[0][-1] def orig_variantcaller_order(x): try: return final["config"]["algorithm"]["orig_variantcaller"].index(x["variantcaller"]) except ValueError: return final["config"]["algorithm"]["orig_jointcaller"].index(x["variantcaller"]) if len(ready_calls) > 1 and "orig_variantcaller" in final["config"]["algorithm"]: final["variants"] = sorted(ready_calls, key=orig_variantcaller_order) final["config"]["algorithm"]["variantcaller"] = final["config"]["algorithm"].pop("orig_variantcaller") if "orig_jointcaller" in final["config"]["algorithm"]: final["config"]["algorithm"]["jointcaller"] = final["config"]["algorithm"].pop("orig_jointcaller") else: final["variants"] = ready_calls final.pop("vrn_file_batch", None) final.pop("vrn_file_orig", None) final.pop("vrn_file_plus", None) final.pop("vrn_stats", None) out.append([final]) return out
def _group_by_batches(samples, check_fn): """Group calls by batches, processing families together during ensemble calling. """ batch_groups = collections.defaultdict(list) extras = [] for data in [x[0] for x in samples]: if check_fn(data): batch_groups[multi.get_batch_for_key(data)].append(data) else: extras.append([data]) return batch_groups, extras
def _group_by_batches(samples, check_fn): """Group calls by batches, processing families together during ensemble calling. """ batch_groups = collections.defaultdict(list) extras = [] for data in [x[0] for x in samples]: if check_fn(data): batch_groups[multi.get_batch_for_key(data)].append(data) else: extras.append([data]) return batch_groups, extras
def _collapse_by_bam_variantcaller(samples): """Collapse regions to a single representative by BAM input, variant caller and batch. """ by_bam = collections.OrderedDict() for data in (x[0] for x in samples): work_bam = utils.get_in(data, ("combine", "work_bam", "out"), data.get("align_bam")) variantcaller = get_variantcaller(data) if isinstance(work_bam, list): work_bam = tuple(work_bam) key = (multi.get_batch_for_key(data), work_bam, variantcaller) try: by_bam[key].append(data) except KeyError: by_bam[key] = [data] out = [] for grouped_data in by_bam.values(): cur = grouped_data[0] cur.pop("region", None) region_bams = cur.pop("region_bams", None) if region_bams and len(region_bams[0]) > 1: cur.pop("work_bam", None) out.append([cur]) return out
def _collapse_by_bam_variantcaller(samples): """Collapse regions to a single representative by BAM input, variant caller and batch. """ by_bam = collections.OrderedDict() for data in (x[0] for x in samples): work_bam = utils.get_in(data, ("combine", "work_bam", "out"), data.get("align_bam")) variantcaller = get_variantcaller(data) if isinstance(work_bam, list): work_bam = tuple(work_bam) key = (multi.get_batch_for_key(data), work_bam, variantcaller) try: by_bam[key].append(data) except KeyError: by_bam[key] = [data] out = [] for grouped_data in by_bam.values(): cur = grouped_data[0] cur.pop("region", None) region_bams = cur.pop("region_bams", None) if region_bams and len(region_bams[0]) > 1: cur.pop("work_bam", None) out.append([cur]) return out