def lf_empirical_probs(self, Y: np.ndarray, k: int) -> np.ndarray: """Estimate conditional probability tables for each LF. Computes conditional probability tables, P(L | Y), for each LF using the provided true labels Y. Parameters ---------- Y The n-dim array of true labels in {1,...,k} k The cardinality i.e. number of classes Returns ------- np.ndarray An m x (k+1) x k np.ndarray representing the m (k+1) x k conditional probability tables P_i, where P_i[l,y] represents P(LF_i = l | Y = y) empirically calculated """ n, m = self.L.shape Y = to_int_label_array(Y) # Compute empirical conditional probabilities # Note: Can do this more efficiently... P = np.zeros((m, k + 1, k)) for y in range(k): is_y = np.where(Y == y, 1, 0) for j, l in product(range(m), range(-1, k)): P[j, l + 1, y] = np.where(self.L[:, j] == l, 1, 0) @ is_y / is_y.sum() return P
def get_label_buckets(*y: np.ndarray) -> Dict[Tuple[int, ...], np.ndarray]: """Return data point indices bucketed by label combinations. Parameters ---------- *y A list of np.ndarray of (int) labels Returns ------- Dict[Tuple[int, ...], np.ndarray] A mapping of each label bucket to a NumPy array of its corresponding indices Example ------- A common use case is calling ``buckets = label_buckets(Y_gold, Y_pred)`` where ``Y_gold`` is a set of gold (i.e. ground truth) labels and ``Y_pred`` is a corresponding set of predicted labels. >>> Y_gold = np.array([1, 1, 1, 0]) >>> Y_pred = np.array([1, 1, -1, -1]) >>> buckets = get_label_buckets(Y_gold, Y_pred) The returned ``buckets[(i, j)]`` is a NumPy array of data point indices with true label i and predicted label j. More generally, the returned indices within each bucket refer to the order of the labels that were passed in as function arguments. >>> buckets[(1, 1)] # true positives array([0, 1]) >>> (1, 0) in buckets # false positives False >>> (0, 1) in buckets # false negatives False >>> (0, 0) in buckets # true negatives False >>> buckets[(1, -1)] # abstained positives array([2]) >>> buckets[(0, -1)] # abstained negatives array([3]) """ buckets: DefaultDict[Tuple[int, int], List[int]] = defaultdict(list) y_flat = list(map(lambda x: to_int_label_array(x, flatten_vector=True), y)) if len(set(map(len, y_flat))) != 1: raise ValueError("Arrays must all have the same number of elements") for i, labels in enumerate(zip(*y_flat)): buckets[labels].append(i) return {k: np.array(v) for k, v in buckets.items()}
def test_to_int_label_array(self): X = np.array([[1], [0], [2.0]]) Y_expected = np.array([1, 0, 2]) Y = to_int_label_array(X, flatten_vector=True) np.testing.assert_array_equal(Y, Y_expected) Y = to_int_label_array(X, flatten_vector=False) Y_expected = np.array([[1], [0], [2]]) np.testing.assert_array_equal(Y, Y_expected) X = np.array([[1], [0], [2.1]]) with self.assertRaisesRegex(ValueError, "non-integer value"): to_int_label_array(X) X = np.array([[1, 0], [0, 1]]) with self.assertRaisesRegex(ValueError, "1d np.array"): to_int_label_array(X, flatten_vector=True)
def lf_empirical_accuracies(self, Y: np.ndarray) -> np.ndarray: """Compute empirical accuracy against a set of labels Y for each LF. Usually, Y represents development set labels. Parameters ---------- Y [n] or [n, 1] np.ndarray of gold labels Returns ------- numpy.ndarray Empirical accuracies for each LF """ Y = to_int_label_array(Y) X = np.where( self.L == -1, 0, np.where(self.L == np.vstack([Y] * self.L.shape[1]).T, 1, -1), ) with np.errstate(divide="ignore", invalid="ignore"): return np.nan_to_num(0.5 * (X.sum(axis=0) / (self.L != -1).sum(axis=0) + 1))
def metric_score( golds: Optional[np.ndarray] = None, preds: Optional[np.ndarray] = None, probs: Optional[np.ndarray] = None, metric: str = "accuracy", filter_dict: Optional[Dict[str, List[int]]] = None, **kwargs: Any, ) -> float: """Evaluate a standard metric on a set of predictions/probabilities. Parameters ---------- golds An array of gold (int) labels preds An array of (int) predictions probs An [n_datapoints, n_classes] array of probabilistic (float) predictions metric The name of the metric to calculate filter_dict A mapping from label set name to the labels that should be filtered out for that label set Returns ------- float The value of the requested metric Raises ------ ValueError The requested metric is not currently supported ValueError The user attempted to calculate roc_auc score for a non-binary problem """ if metric not in METRICS: msg = f"The metric you provided ({metric}) is not currently implemented." raise ValueError(msg) # Print helpful error messages if golds or preds has invalid shape or type golds = to_int_label_array(golds) if golds is not None else None preds = to_int_label_array(preds) if preds is not None else None # Optionally filter out examples (e.g., abstain predictions or unknown labels) label_dict: Dict[str, Optional[np.ndarray]] = { "golds": golds, "preds": preds, "probs": probs, } if filter_dict: if set(filter_dict.keys()).difference(set(label_dict.keys())): raise ValueError( "filter_dict must only include keys in ['golds', 'preds', 'probs']" ) # label_dict is overwritten from type Dict[str, Optional[np.ndarray]] # to Dict[str, np.ndarray] label_dict = filter_labels(label_dict, filter_dict) # type: ignore # Confirm that required label sets are available func, label_names = METRICS[metric] for label_name in label_names: if label_dict[label_name] is None: raise ValueError( f"Metric {metric} requires access to {label_name}.") label_sets = [label_dict[label_name] for label_name in label_names] return func(*label_sets, **kwargs)