def average_counts(pe): tasks = list(pe) metrics = list(pe[tasks[0]]) return { metric: mean([pe[task][metric] for task in tasks]) for metric in metrics }
def average_profile(pe): tasks = list(pe) binsizes = list(pe[tasks[0]]) return { binsize: { "auprc": mean([pe[task][binsize]['auprc'] for task in tasks]) } for binsize in binsizes }
def evaluate(self, dataset, eval_metric=None, num_workers=8, batch_size=256): lpreds = [] llabels = [] for inputs, targets in tqdm(dataset.batch_train_iter( cycle=False, num_workers=num_workers, batch_size=batch_size), total=len(dataset) // batch_size): assert isinstance(targets, dict) target_keys = list(targets) llabels.append(deepcopy(targets)) bpreds = { k: v for k, v in self.predict(inputs, batch_size=None).items() if k in target_keys } # keep only the target key predictions lpreds.append(bpreds) del inputs del targets preds = numpy_collate_concat(lpreds) labels = numpy_collate_concat(llabels) del lpreds del llabels if eval_metric is not None: return eval_metric(labels, preds) else: task_avg_tape = defaultdict(list) out = {} for task, heads in self.all_heads.items(): for head_i, head in enumerate(heads): target_name = head.get_target(task) if target_name not in labels: print( f"Target {target_name} not found. Skipping evaluation" ) continue res = head.metric(labels[target_name], preds[target_name]) out[target_name] = res metrics_dict = flatten(res, separator='/') for k, v in metrics_dict.items(): task_avg_tape[ head.target_name.replace("{task}", "avg") + "/" + k].append(v) for k, v in task_avg_tape.items(): # get the average out[k] = mean(v) # flatten everything out = flatten(out, separator='/') return out
def get_hyp_contrib(self, contrib_score=None, idx=None): contrib_score = (contrib_score if contrib_score is not None else self.default_contrib_score) if contrib_score in self._hyp_contrib_cache and idx is None: return self._hyp_contrib_cache[contrib_score] else: # NOTE: this line averages any additional axes after {contrib_score} like # strands denoted with: # /hyp_contrib/{task}/{contrib_score}/{strand}, where strand = 0 or 1 out = { task: mean([ self._subset(self.data[k], idx) for k in self._data_subkeys(f'/hyp_contrib/{task}/{contrib_score}') ]) for task in self.get_tasks() } if idx is None: self._hyp_contrib_cache[contrib_score] = out return out