Ejemplo n.º 1
0
def calculate_overall_lwlrap_sklearn(truth, scores):
    """Calculate the overall lwlrap using sklearn.metrics.lrap."""
    # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
    sample_weight = np.sum(truth > 0, axis=1)
    nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
    overall_lwlrap = lrap(
        truth[nonzero_weight_sample_indices, :] > 0,
        scores[nonzero_weight_sample_indices, :],
        sample_weight=sample_weight[nonzero_weight_sample_indices])
    return overall_lwlrap
Ejemplo n.º 2
0
 def update(self, y_true, y_pred):
     self._lrap += lrap(y_true, y_pred) * y_pred.shape[0]
     self._n_items += y_pred.shape[0]
def get_model_stats(y_true,
                    model_outputs,
                    b_thres=.5,
                    q_thres=.5,
                    e_thres=.5,
                    plot_roc=True):
    '''
        Gets the performance statistics of a model based on its outputs and the ground truth.
    '''
    b_scores, q_scores, e_scores = model_outputs[:,
                                                 0], model_outputs[:,
                                                                   1], model_outputs[:,
                                                                                     2]
    b_true, q_true, e_true = y_true[:, 0], y_true[:, 1], y_true[:, 2]

    b_roc_auc = roc_auc_score(b_true, b_scores)
    q_roc_auc = roc_auc_score(q_true, q_scores)
    e_roc_auc = roc_auc_score(e_true, e_scores)

    b_fpr, b_tpr, _ = roc_curve(b_true, b_scores)
    q_fpr, q_tpr, _ = roc_curve(q_true, q_scores)
    e_fpr, e_tpr, _ = roc_curve(e_true, e_scores)

    if plot_roc:
        plt.figure()
        plt.title('ROC Curves \nfor Bugs, Questions and Enhancements')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.plot(b_fpr,
                 b_tpr,
                 color='orange',
                 label=f'Bug{" "*24}, AUC: {b_roc_auc:.3f}')
        plt.plot(q_fpr,
                 q_tpr,
                 color='blue',
                 label=f'Question{" "*16}, AUC: {q_roc_auc:.3f}')
        plt.plot(e_fpr,
                 e_tpr,
                 color='green',
                 label=f'Enhancement{" "*8}, AUC: {e_roc_auc:.3f}')
        plt.plot([0, 1], [0, 1],
                 color='red',
                 linestyle='--',
                 label=f'Random Guess{" "*6}, AUC: 0.5')
        plt.legend(loc="lower right")
        plt.show()

    b_preds = np.where(b_scores >= b_thres, 1, 0)
    q_preds = np.where(q_scores >= q_thres, 1, 0)
    e_preds = np.where(e_scores >= e_thres, 1, 0)

    b_accuracy = accuracy_score(b_true, b_preds)
    q_accuracy = accuracy_score(q_true, q_preds)
    e_accuracy = accuracy_score(e_true, e_preds)

    b_precision, b_recall, b_f1, _ = precision_recall_fscore_support(
        b_true, b_preds, average='binary')
    q_precision, q_recall, q_f1, _ = precision_recall_fscore_support(
        q_true, q_preds, average='binary')
    e_precision, e_recall, e_f1, _ = precision_recall_fscore_support(
        e_true, e_preds, average='binary')

    y_pred = np.concatenate(
        (b_preds.reshape(-1, 1), q_preds.reshape(-1, 1), e_preds.reshape(
            -1, 1)),
        axis=1)

    exact_matches = 0
    for true, pred in zip(y_true, y_pred):
        if (true == pred).all():
            exact_matches += 1

    exact_accuracy = exact_matches / len(y_true)

    metrics_df = pd.DataFrame(
        [[b_accuracy, b_roc_auc, b_precision, b_recall, b_f1],
         [q_accuracy, q_roc_auc, q_precision, q_recall, q_f1],
         [e_accuracy, e_roc_auc, e_precision, e_recall, e_f1]],
        columns=['Accuracy', 'ROC-AUC', 'Precision', 'Recall', 'F1'],
        index=['Bug', 'Question', 'Enhancement'])

    lrap_score = lrap(y_true, model_outputs)

    return metrics_df, exact_accuracy, lrap_score
def get_model_stats(
    y_true, model_outputs, b_thres=0.5, q_thres=0.5, e_thres=0.5, plot_roc=True
):
    """
    Gets the performance statistics of a model based on its outputs and the ground truth.
    """
    b_scores, q_scores, e_scores = (
        model_outputs[:, 0],
        model_outputs[:, 1],
        model_outputs[:, 2],
    )
    b_true, q_true, e_true = y_true[:, 0], y_true[:, 1], y_true[:, 2]

    b_roc_auc = roc_auc_score(b_true, b_scores)
    q_roc_auc = roc_auc_score(q_true, q_scores)
    e_roc_auc = roc_auc_score(e_true, e_scores)

    b_fpr, b_tpr, _ = roc_curve(b_true, b_scores)
    q_fpr, q_tpr, _ = roc_curve(q_true, q_scores)
    e_fpr, e_tpr, _ = roc_curve(e_true, e_scores)

    if plot_roc:
        plt.figure()
        plt.title("ROC Curves \nfor Bugs, Questions and Enhancements")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.plot(
            b_fpr, b_tpr, color="orange", label=f'Bug{" "*24}, AUC: {b_roc_auc:.3f}'
        )
        plt.plot(
            q_fpr, q_tpr, color="blue", label=f'Question{" "*16}, AUC: {q_roc_auc:.3f}'
        )
        plt.plot(
            e_fpr,
            e_tpr,
            color="green",
            label=f'Enhancement{" "*8}, AUC: {e_roc_auc:.3f}',
        )
        plt.plot(
            [0, 1],
            [0, 1],
            color="red",
            linestyle="--",
            label=f'Random Guess{" "*6}, AUC: 0.5',
        )
        plt.legend(loc="lower right")
        plt.show()

    b_preds = np.where(b_scores >= b_thres, 1, 0)
    q_preds = np.where(q_scores >= q_thres, 1, 0)
    e_preds = np.where(e_scores >= e_thres, 1, 0)

    b_accuracy = accuracy_score(b_true, b_preds)
    q_accuracy = accuracy_score(q_true, q_preds)
    e_accuracy = accuracy_score(e_true, e_preds)

    b_precision, b_recall, b_f1, _ = precision_recall_fscore_support(
        b_true, b_preds, average="binary"
    )
    q_precision, q_recall, q_f1, _ = precision_recall_fscore_support(
        q_true, q_preds, average="binary"
    )
    e_precision, e_recall, e_f1, _ = precision_recall_fscore_support(
        e_true, e_preds, average="binary"
    )

    y_pred = np.concatenate(
        (b_preds.reshape(-1, 1), q_preds.reshape(-1, 1), e_preds.reshape(-1, 1)), axis=1
    )

    exact_matches = 0
    for true, pred in zip(y_true, y_pred):
        if (true == pred).all():
            exact_matches += 1

    exact_accuracy = exact_matches / len(y_true)

    metrics_df = pd.DataFrame(
        [
            [b_accuracy, b_roc_auc, b_precision, b_recall, b_f1],
            [q_accuracy, q_roc_auc, q_precision, q_recall, q_f1],
            [e_accuracy, e_roc_auc, e_precision, e_recall, e_f1],
        ],
        columns=["Accuracy", "ROC-AUC", "Precision", "Recall", "F1"],
        index=["Bug", "Question", "Enhancement"],
    )

    lrap_score = lrap(y_true, model_outputs)

    return metrics_df, exact_accuracy, lrap_score