Beispiel #1
0
def confusion_matrix(
    gold,
    pred,
    null_pred=False,
    null_gold=False,
    normalize=False,
    pretty_print=True,
):
    """A shortcut method for building a confusion matrix all at once.

    Args:
        gold: an array-like of gold labels (ints)
        pred: an array-like of predictions (ints)
        null_pred: If True, include the row corresponding to null predictions
        null_gold: If True, include the col corresponding to null gold labels
        normalize: if True, divide counts by the total number of items
        pretty_print: if True, pretty-print the matrix before returning
    """
    conf = ConfusionMatrix(null_pred=null_pred, null_gold=null_gold)
    gold = arraylike_to_numpy(gold)
    pred = arraylike_to_numpy(pred)
    conf.add(gold, pred)
    mat = conf.compile()

    if normalize:
        mat = mat / len(gold)

    if pretty_print:
        conf.display(normalize=normalize)

    return mat
Beispiel #2
0
def roc_auc_score(gold, probs, ignore_in_gold=[], ignore_in_pred=[]):
    """Compute the ROC AUC score, given the gold labels and predicted probs.

    Args:
        gold: A 1d array-like of gold labels
        probs: A 2d array-like of predicted probabilities
        ignore_in_gold: A list of labels for which elements having that gold
            label will be ignored.

    Returns:
        roc_auc_score: The (float) roc_auc score
    """
    gold = arraylike_to_numpy(gold)

    # Filter out the ignore_in_gold (but not ignore_in_pred)
    # Note the current sub-functions (below) do not handle this...
    if len(ignore_in_pred) > 0:
        raise ValueError("ignore_in_pred not defined for ROC-AUC score.")
    keep = [x not in ignore_in_gold for x in gold]
    gold = gold[keep]
    probs = probs[keep, :]

    # Convert gold to one-hot indicator format, using the k inferred from probs
    gold_s = hard_to_soft(torch.from_numpy(gold), k=probs.shape[1]).numpy()
    return skm.roc_auc_score(gold_s, probs)
Beispiel #3
0
def single_lf_summary(Y_p, Y=None):
    """Calculates coverage, overlap, conflicts, and accuracy for a single LF

    Args:
        Y_p: a np.array or torch.Tensor of predicted labels
        Y: a np.array or torch.Tensor of true labels (if known)
    """
    L = sparse.csr_matrix(arraylike_to_numpy(Y_p).reshape(-1, 1))
    return lf_summary(L, Y)
Beispiel #4
0
def lf_empirical_accuracies(L, Y):
    """Return the **empirical accuracy** against a set of labels Y (e.g. dev
    set) for each LF.
    Args:
        L: an n x m scipy.sparse matrix where L_{i,j} is the label given by the
            jth LF to the ith candidate
        Y: an [n] or [n, 1] np.ndarray of gold labels
    """
    # Assume labeled set is small, work with dense matrices
    Y = arraylike_to_numpy(Y)
    L = L.toarray()
    X = np.where(L == 0, 0, np.where(L == np.vstack([Y] * L.shape[1]).T, 1, -1))
    return 0.5 * (X.sum(axis=0) / (L != 0).sum(axis=0) + 1)
Beispiel #5
0
def error_buckets(gold, pred, X=None):
    """Group items by error buckets

    Args:
        gold: an array-like of gold labels (ints)
        pred: an array-like of predictions (ints)
        X: an iterable of items
    Returns:
        buckets: A dict of items where buckets[i,j] is a list of items with
            predicted label i and true label j. If X is None, return indices
            instead.

    For a binary problem with (1=positive, 2=negative):
        buckets[1,1] = true positives
        buckets[1,2] = false positives
        buckets[2,1] = false negatives
        buckets[2,2] = true negatives
    """
    buckets = defaultdict(list)
    gold = arraylike_to_numpy(gold)
    pred = arraylike_to_numpy(pred)
    for i, (y, l) in enumerate(zip(gold, pred)):
        buckets[y, l].append(X[i] if X is not None else i)
    return buckets
Beispiel #6
0
def _preprocess(gold, pred, ignore_in_gold, ignore_in_pred):
    gold = arraylike_to_numpy(gold)
    pred = arraylike_to_numpy(pred)
    if ignore_in_gold or ignore_in_pred:
        gold, pred = _drop_ignored(gold, pred, ignore_in_gold, ignore_in_pred)
    return gold, pred