def eval_cluster_f(self, gold_cluster, test_cluster): """Computes the clustering f-score of the gold and test cluster, where prec = |overlap| / |test|, rec = |overlap| / |gold|. Returns two tuples of values: the first is (prec, rec, f) , and the second is (|overlap|, |gold|, |test|).""" matched = self.by_test[test_cluster][gold_cluster] proposed = sum(self.by_test[test_cluster].values()) true = self.gold_sizes[gold_cluster] (p,r,f) = precision_recall_f(matched, true, proposed) return (p, r, f), (matched, true, proposed)
def macro_average_f(self): """Evaluates the macro-average f-score. Macro-averaging adds number matched and cluster sizes for each pair of clusters, then takes the f-score at the end. Clusters are matched to maximize overlap, though this does not necessarily maximize the metric itself.""" match = 0 prop = 0 true = 0 for gold in self.all_gold: counts = [self.eval_cluster_f(gold, test)[1] for test in self.all_test] best = max(counts, key=lambda x: x[0]) #matched match += best[0] true += best[1] prop += best[2] (p,r,f) = precision_recall_f(match, true, prop) return f
def prec_rec(self): """Calculates the classification precision, recall and F-score over edges, with respect to the same-cluster class.""" N00,N11,N01,N10 = self.pairwise_statistics return precision_recall_f(N11, (N11 + N10), (N11 + N01))