Beispiel #1
0
def multilabel_prediction_report(y_true, y_pred, scores=None, score_fmt='.3f'):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')
    if scores is not None and y_true.shape != scores.shape:
        raise ValueError('y_true and scores must have equal shapes')

    # Format likelihoods so that they are properly aligned
    if scores is not None:
        printable_scores = _printable_scores(scores, score_fmt=score_fmt)
        assert len(printable_scores) == len(y_true)
    else:
        printable_scores = None

    # Generate table
    headers = ['', 'label', 'prediction', 'scores']
    data = []
    for idx, pred in enumerate(y_pred):
        label = y_true[idx]
        correct = (np.all(label == pred))
        symbol = '+' if correct else '-'
        row = [symbol, str(label), str(pred), '-' if printable_scores is None else ('[%s]' % printable_scores[idx])]
        data.append(row)
    return tabulate(data, headers=headers, floatfmt='.0f')
Beispiel #2
0
def multilabel_prediction_report(y_true, y_pred, scores=None, score_fmt='.3f'):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')
    if scores is not None and y_true.shape != scores.shape:
        raise ValueError('y_true and scores must have equal shapes')

    # Format likelihoods so that they are properly aligned
    if scores is not None:
        printable_scores = _printable_scores(scores, score_fmt=score_fmt)
        assert len(printable_scores) == len(y_true)
    else:
        printable_scores = None

    # Generate table
    headers = ['', 'label', 'prediction', 'scores']
    data = []
    for idx, pred in enumerate(y_pred):
        label = y_true[idx]
        correct = (np.all(label == pred))
        symbol = '+' if correct else '-'
        row = [
            symbol,
            str(label),
            str(pred), '-' if printable_scores is None else
            ('[%s]' % printable_scores[idx])
        ]
        data.append(row)
    return tabulate(data, headers=headers, floatfmt='.0f')
Beispiel #3
0
def multilabel_classification_report(y_true, y_pred, fmt='.3f', target_names=None):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')
    n_labels = y_true.shape[1]
    if target_names is not None and len(target_names) != n_labels:
        raise ValueError('target_names must specify a name for all %d labels' % n_labels)

    # Collect stats
    precision, recall, f1_score, support = precision_recall_fscore_support(y_true, y_pred)
    tp, fp, tn, fn = multilabel_tp_fp_tn_fn_scores(y_true, y_pred)
    accuracy = multilabel_accuracy(y_true, y_pred)

    # Generate data for table, where each row represents a label
    headers = ['', 'precision', 'recall', 'f1-score', 'accuracy', 'support', 'TP', 'TN', 'FP', 'FN']
    data = []
    for label_idx in range(n_labels):
        target_name = str(label_idx) if target_names is None else target_names[label_idx]
        row = [target_name, precision[label_idx], recall[label_idx], f1_score[label_idx], accuracy[label_idx],
               support[label_idx], tp[label_idx], tn[label_idx], fp[label_idx], fn[label_idx]]
        data.append(row)

    # Calculate summaries for all values
    summary = ['avg / total', np.average(precision), np.average(recall), np.average(f1_score), np.average(accuracy),
               np.sum(support), np.sum(tp), np.sum(tn), np.sum(fp), np.sum(fn)]
    data.append(summary)

    return tabulate(data, headers=headers, floatfmt=fmt)
Beispiel #4
0
def multilabel_accuracy(y_true, y_pred):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    n_labels = y_true.shape[1]
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')

    accuracy = np.zeros(n_labels)
    for label_idx in range(n_labels):
        accuracy[label_idx] = accuracy_score(y_true[:, label_idx], y_pred[:, label_idx])
    return accuracy
Beispiel #5
0
def multilabel_accuracy(y_true, y_pred):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    n_labels = y_true.shape[1]
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')

    accuracy = np.zeros(n_labels)
    for label_idx in range(n_labels):
        accuracy[label_idx] = accuracy_score(y_true[:, label_idx],
                                             y_pred[:, label_idx])
    return accuracy
Beispiel #6
0
def multilabel_classification_report(y_true,
                                     y_pred,
                                     fmt='.3f',
                                     target_names=None):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')
    n_labels = y_true.shape[1]
    if target_names is not None and len(target_names) != n_labels:
        raise ValueError('target_names must specify a name for all %d labels' %
                         n_labels)

    # Collect stats
    precision, recall, f1_score, support = precision_recall_fscore_support(
        y_true, y_pred)
    tp, fp, tn, fn = multilabel_tp_fp_tn_fn_scores(y_true, y_pred)
    accuracy = multilabel_accuracy(y_true, y_pred)

    # Generate data for table, where each row represents a label
    headers = [
        '', 'precision', 'recall', 'f1-score', 'accuracy', 'support', 'TP',
        'TN', 'FP', 'FN'
    ]
    data = []
    for label_idx in range(n_labels):
        target_name = str(
            label_idx) if target_names is None else target_names[label_idx]
        row = [
            target_name, precision[label_idx], recall[label_idx],
            f1_score[label_idx], accuracy[label_idx], support[label_idx],
            tp[label_idx], tn[label_idx], fp[label_idx], fn[label_idx]
        ]
        data.append(row)

    # Calculate summaries for all values
    summary = [
        'avg / total',
        np.average(precision),
        np.average(recall),
        np.average(f1_score),
        np.average(accuracy),
        np.sum(support),
        np.sum(tp),
        np.sum(tn),
        np.sum(fp),
        np.sum(fn)
    ]
    data.append(summary)

    return tabulate(data, headers=headers, floatfmt=fmt)
Beispiel #7
0
def multilabel_tp_fp_tn_fn_scores(y_true, y_pred):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    n_labels = y_true.shape[1]
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')

    # Each confusion matrix has the following structure:
    # [[tp, fn],[fp, tn]]
    # When unrolling, the indexes are as follows:
    tp_idx, fn_idx, fp_idx, tn_idx = (0, 1, 2, 3)
    matrix = np.zeros((4, n_labels), dtype=int)
    for label_idx in range(n_labels):
        matrix[:, label_idx] = confusion_matrix(y_true[:, label_idx], y_pred[:, label_idx], labels=[1, 0]).ravel()
    return matrix[tp_idx], matrix[fp_idx], matrix[tn_idx], matrix[fn_idx]
 def fit(self, X, y):
     X = check_feature_array(X)
     y = check_multilabel_array(y)
     if X.shape != y.shape:
         raise ValueError('X (shape=%s) and y (shape=%s) must have equal shapes' % (X.shape, y.shape))
     self.n_features_ = X.shape[1]
     self.model_ = OneVsRestClassifier(self._init_model(), n_jobs=self.n_jobs).fit(X, y)
Beispiel #9
0
def multilabel_tp_fp_tn_fn_scores(y_true, y_pred):
    y_true = check_multilabel_array(y_true)
    y_pred = check_multilabel_array(y_pred)
    n_labels = y_true.shape[1]
    if y_true.shape != y_pred.shape:
        raise ValueError('y_true and y_pred must have equal shapes')

    # Each confusion matrix has the following structure:
    # [[tp, fn],[fp, tn]]
    # When unrolling, the indexes are as follows:
    tp_idx, fn_idx, fp_idx, tn_idx = (0, 1, 2, 3)
    matrix = np.zeros((4, n_labels), dtype=int)
    for label_idx in range(n_labels):
        matrix[:, label_idx] = confusion_matrix(y_true[:, label_idx],
                                                y_pred[:, label_idx],
                                                labels=[1, 0]).ravel()
    return matrix[tp_idx], matrix[fp_idx], matrix[tn_idx], matrix[fn_idx]
    def fit(self, X, y):
        """Fit the _MultiLabelDecisionMaker according to the given training data.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples and n_features is the number of features.
        y : array-like, shape (n_samples, n_features)
            Binary target values, where for each sample a feature is either on (encoded as 1) or off (encoded as 0).
        """
        X = check_feature_array(X)
        y = check_multilabel_array(y)
        if X.shape != y.shape:
            raise ValueError('X (shape=%s) and y (shape=%s) must have equal shapes' % (X.shape, y.shape))
        self.n_features_ = X.shape[1]
        self.model_ = self._init_model().fit(X, y)
Beispiel #11
0
def multilabel_loglikelihood_report(y_true, loglikelihoods, fmt='.0f'):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')

    # Format likelihoods so that they are properly aligned
    printable_loglikelihoods = _printable_scores(loglikelihoods, score_fmt=fmt)
    assert len(printable_loglikelihoods) == len(loglikelihoods)

    # Generate table
    headers = ['label', 'loglikelihoods']
    data = []
    for idx, label in enumerate(y_true):
        row = [str(label), '[' + printable_loglikelihoods[idx] + ']']
        data.append(row)
    return tabulate(data, headers=headers, floatfmt='.0f')
Beispiel #12
0
def multilabel_loglikelihood_report(y_true, loglikelihoods, fmt='.0f'):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')

    # Format likelihoods so that they are properly aligned
    printable_loglikelihoods = _printable_scores(loglikelihoods, score_fmt=fmt)
    assert len(printable_loglikelihoods) == len(loglikelihoods)

    # Generate table
    headers = ['label', 'loglikelihoods']
    data = []
    for idx, label in enumerate(y_true):
        row = [str(label), '[' + printable_loglikelihoods[idx] + ']']
        data.append(row)
    return tabulate(data, headers=headers, floatfmt='.0f')
Beispiel #13
0
def multilabel_loglikelihood_summary(y_true, loglikelihoods):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')
    n_labels = y_true.shape[1]

    # Calculate stats
    summary = np.zeros((n_labels, 4))
    for label_idx in xrange(n_labels):
        pos_indexes = np.where(y_true[:, label_idx] == 1)[0]
        neg_indexes = np.where(y_true[:, label_idx] == 0)[0]

        summary[label_idx][0] = np.average(loglikelihoods[:, label_idx][pos_indexes])
        summary[label_idx][1] = np.std(loglikelihoods[:, label_idx][pos_indexes])
        summary[label_idx][2] = np.average(loglikelihoods[:, label_idx][neg_indexes])
        summary[label_idx][3] = np.std(loglikelihoods[:, label_idx][neg_indexes])
    return summary
Beispiel #14
0
def multilabel_loglikelihood_summary(y_true, loglikelihoods):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')
    n_labels = y_true.shape[1]

    # Calculate stats
    summary = np.zeros((n_labels, 4))
    for label_idx in xrange(n_labels):
        pos_indexes = np.where(y_true[:, label_idx] == 1)[0]
        neg_indexes = np.where(y_true[:, label_idx] == 0)[0]

        summary[label_idx][0] = np.average(
            loglikelihoods[:, label_idx][pos_indexes])
        summary[label_idx][1] = np.std(loglikelihoods[:,
                                                      label_idx][pos_indexes])
        summary[label_idx][2] = np.average(
            loglikelihoods[:, label_idx][neg_indexes])
        summary[label_idx][3] = np.std(loglikelihoods[:,
                                                      label_idx][neg_indexes])
    return summary