Ejemplo n.º 1
0
 def test_cmap(self):
     plot_calibration_curve(convert_labels_into_string(self.y),
                            [self.lr_probas, self.rf_probas],
                            cmap='Spectral')
     plot_calibration_curve(convert_labels_into_string(self.y),
                            [self.lr_probas, self.rf_probas],
                            cmap=plt.cm.Spectral)
Ejemplo n.º 2
0
 def test_array_like(self):
     plot_calibration_curve(
         self.y, [self.lr_probas.tolist(),
                  self.rf_probas.tolist()])
     plot_calibration_curve(convert_labels_into_string(
         self.y), [self.lr_probas.tolist(),
                   self.rf_probas.tolist()])
Ejemplo n.º 3
0
 def test_ax(self):
     plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas])
     fig, ax = plt.subplots(1, 1)
     out_ax = plot_calibration_curve(self.y,
                                     [self.lr_probas, self.rf_probas])
     assert ax is not out_ax
     out_ax = plot_calibration_curve(self.y,
                                     [self.lr_probas, self.rf_probas],
                                     ax=ax)
     assert ax is out_ax
Ejemplo n.º 4
0
 def test_string_classes(self):
     plot_calibration_curve(convert_labels_into_string(self.y),
                            [self.lr_probas, self.rf_probas])
Ejemplo n.º 5
0
 def test_plot_calibration(self):
     plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas])
Ejemplo n.º 6
0
 def test_decision_function(self):
     plot_calibration_curve(self.y, [self.lr_probas,
                                     self.rf_probas,
                                     self.svc_scores])
Ejemplo n.º 7
0
def eval_model_v2(
    context,
    xtest,
    ytest,
    model,
    pcurve_bins: int = 10,
    pcurve_names: List[str] = ["my classifier"],
    plots_artifact_path: str = "",
    pred_params: dict = {},
    cmap="Blues",
):
    """generate predictions and validation stats

    pred_params are non-default, scikit-learn api prediction-function
    parameters. For example, a tree-type of model may have a tree depth
    limit for its prediction function.

    :param xtest:        features array type Union(DataItem, DataFrame,
                         numpy array)
    :param ytest:        ground-truth labels Union(DataItem, DataFrame,
                         Series, numpy array, List)
    :param model:        estimated model
    :param pcurve_bins:  (10) subdivide [0,1] interval into n bins, x-axis
    :param pcurve_names: label for each calibration curve
    :param pred_params:  (None) dict of predict function parameters
    :param cmap:         ('Blues') matplotlib color map
    """

    import numpy as np

    def df_blob(df):
        return bytes(df.to_csv(index=False), encoding="utf-8")

    if isinstance(ytest, np.ndarray):
        unique_labels = np.unique(ytest)
    elif isinstance(ytest, list):
        unique_labels = set(ytest)
    else:
        try:
            ytest = ytest.values
            unique_labels = np.unique(ytest)
        except Exception as exc:
            raise Exception(f"unrecognized data type for ytest {exc}")

    n_classes = len(unique_labels)
    is_multiclass = True if n_classes > 2 else False

    # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED
    plots_path = plots_artifact_path or context.artifact_subpath("plots")
    extra_data = {}

    ypred = model.predict(xtest, **pred_params)
    context.log_results({
        "accuracy": float(metrics.accuracy_score(ytest, ypred)),
        "test-error": np.sum(ytest != ypred) / ytest.shape[0],
    })

    # PROBABILITIES
    if hasattr(model, "predict_proba"):
        yprob = model.predict_proba(xtest, **pred_params)
        if not is_multiclass:
            fraction_of_positives, mean_predicted_value = calibration_curve(
                ytest, yprob[:, -1], n_bins=pcurve_bins, strategy="uniform")
            cmd = plot_calibration_curve(ytest, [yprob], pcurve_names)
            calibration = context.log_artifact(
                PlotArtifact(
                    "probability-calibration",
                    body=cmd.get_figure(),
                    title="probability calibration plot",
                ),
                artifact_path=plots_path,
                db_key=False,
            )
            extra_data["probability calibration"] = calibration

    # CONFUSION MATRIX
    cm = sklearn_confusion_matrix(ytest, ypred, normalize="all")
    df = pd.DataFrame(data=cm)
    extra_data["confusion matrix table.csv"] = df_blob(df)

    cmd = metrics.plot_confusion_matrix(
        model,
        xtest,
        ytest,
        normalize="all",
        values_format=".2g",
        cmap=plt.get_cmap(cmap),
    )
    confusion = context.log_artifact(
        PlotArtifact(
            "confusion-matrix",
            body=cmd.figure_,
            title="Confusion Matrix - Normalized Plot",
        ),
        artifact_path=plots_path,
        db_key=False,
    )
    extra_data["confusion matrix"] = confusion

    # LEARNING CURVES
    if hasattr(model, "evals_result"):
        results = model.evals_result()
        train_set = list(results.items())[0]
        valid_set = list(results.items())[1]

        learning_curves_df = None
        if is_multiclass:
            if hasattr(train_set[1], "merror"):
                learning_curves_df = pd.DataFrame({
                    "train_error":
                    train_set[1]["merror"],
                    "valid_error":
                    valid_set[1]["merror"],
                })
        else:
            if hasattr(train_set[1], "error"):
                learning_curves_df = pd.DataFrame({
                    "train_error":
                    train_set[1]["error"],
                    "valid_error":
                    valid_set[1]["error"],
                })

        if learning_curves_df:
            extra_data["learning curve table.csv"] = df_blob(
                learning_curves_df)

            _, ax = plt.subplots()
            plt.xlabel("# training examples")
            plt.ylabel("error rate")
            plt.title("learning curve - error")
            ax.plot(learning_curves_df["train_error"], label="train")
            ax.plot(learning_curves_df["valid_error"], label="valid")
            learning = context.log_artifact(
                PlotArtifact("learning-curve",
                             body=plt.gcf(),
                             title="Learning Curve - erreur"),
                artifact_path=plots_path,
                db_key=False,
            )
            extra_data["learning curve"] = learning

    # FEATURE IMPORTANCES
    if hasattr(model, "feature_importances_"):
        (fi_plot, fi_tbl) = feature_importances(model, xtest.columns)
        extra_data["feature importances"] = context.log_artifact(
            fi_plot, db_key=False, artifact_path=plots_path)
        extra_data["feature importances table.csv"] = df_blob(fi_tbl)

    # AUC - ROC - PR CURVES
    if is_multiclass:
        lb = LabelBinarizer()
        ytest_b = lb.fit_transform(ytest)

        extra_data["precision_recall_multi"] = context.log_artifact(
            precision_recall_multi(ytest_b, yprob, unique_labels),
            artifact_path=plots_path,
            db_key=False,
        )
        extra_data["roc_multi"] = context.log_artifact(
            roc_multi(ytest_b, yprob, unique_labels),
            artifact_path=plots_path,
            db_key=False,
        )

        # AUC multiclass
        aucmicro = metrics.roc_auc_score(ytest_b,
                                         yprob,
                                         multi_class="ovo",
                                         average="micro")
        aucweighted = metrics.roc_auc_score(ytest_b,
                                            yprob,
                                            multi_class="ovo",
                                            average="weighted")

        context.log_results({
            "auc-micro": aucmicro,
            "auc-weighted": aucweighted
        })

        # others (todo - macro, micro...)
        f1 = metrics.f1_score(ytest, ypred, average="macro")
        ps = metrics.precision_score(ytest, ypred, average="macro")
        rs = metrics.recall_score(ytest, ypred, average="macro")
        context.log_results({
            "f1-score": f1,
            "precision_score": ps,
            "recall_score": rs
        })

    else:
        yprob_pos = yprob[:, 1]
        extra_data["precision_recall_bin"] = context.log_artifact(
            precision_recall_bin(model, xtest, ytest, yprob_pos),
            artifact_path=plots_path,
            db_key=False,
        )
        extra_data["roc_bin"] = context.log_artifact(
            roc_bin(ytest, yprob_pos, clear=True),
            artifact_path=plots_path,
            db_key=False,
        )

        rocauc = metrics.roc_auc_score(ytest, yprob_pos)
        brier_score = metrics.brier_score_loss(ytest,
                                               yprob_pos,
                                               pos_label=ytest.max())
        f1 = metrics.f1_score(ytest, ypred)
        ps = metrics.precision_score(ytest, ypred)
        rs = metrics.recall_score(ytest, ypred)
        context.log_results({
            "rocauc": rocauc,
            "brier_score": brier_score,
            "f1-score": f1,
            "precision_score": ps,
            "recall_score": rs,
        })

    # return all model metrics and plots
    return extra_data
Ejemplo n.º 8
0
from scikitplot.metrics import plot_calibration_curve

X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train,
                                                          y_train,
                                                          stratify=y_train,
                                                          random_state=0)

rf = RandomForestClassifier(n_estimators=100)

rf_probas = rf.fit(X_train_sub, y_train_sub).predict_proba(X_test)
lr_probas = lr.fit(X_train_sub, y_train_sub).predict_proba(X_test)

probas_list = [rf_probas, lr_probas]
clf_names = ['Random Forest', 'Logistic Regression']

plot_calibration_curve(y_test, probas_list, clf_names, n_bins=4)

######## Sigmoid + Isotonic Regression #######

# specifying `cv='prefit'` says to use the prefit `rf` model from before

cal_rf = CalibratedClassifierCV(rf, cv="prefit", method='sigmoid')
cal_rf.fit(X_val, y_val)
scores_sigm = cal_rf.predict_proba(X_test)

cal_rf_iso = CalibratedClassifierCV(rf, cv="prefit", method='isotonic')
cal_rf_iso.fit(X_val, y_val)
scores_iso = cal_rf_iso.predict_proba(X_test)

probas_list = [rf_probas, lr_probas, scores_sigm, scores_iso]
clf_names = ['Random Forest', 'Logistic Regression', 'Sigmoid', 'Isotonic']
Ejemplo n.º 9
0
def eval_class_model(context,
                     xtest,
                     ytest,
                     model,
                     plots_dest: str = "plots",
                     pred_params: dict = {}):
    """generate predictions and validation stats
    
    pred_params are non-default, scikit-learn api prediction-function parameters.
    For example, a tree-type of model may have a tree depth limit for its prediction
    function.
    
    :param xtest:        features array type Union(DataItem, DataFrame, np. Array)
    :param ytest:        ground-truth labels Union(DataItem, DataFrame, Series, np. Array, List)
    :param model:        estimated model
    :param pred_params:  (None) dict of predict function parameters
    """
    if isinstance(ytest, np.ndarray):
        unique_labels = np.unique(ytest)
    elif isinstance(ytest, list):
        unique_labels = set(ytest)
    else:
        try:
            ytest = ytest.values
            unique_labels = np.unique(ytest)
        except:
            raise Exception("unrecognized data type for ytest")

    n_classes = len(unique_labels)
    is_multiclass = True if n_classes > 2 else False

    # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED
    mm_plots = []
    mm_tables = []
    mm = {}

    ypred = model.predict(xtest, **pred_params)
    mm.update({
        "test-accuracy": float(metrics.accuracy_score(ytest, ypred)),
        "test-error": np.sum(ytest != ypred) / ytest.shape[0]
    })

    # GEN PROBS (INCL CALIBRATED PROBABILITIES)
    if hasattr(model, "predict_proba"):
        yprob = model.predict_proba(xtest, **pred_params)
    else:
        # todo if decision fn...
        raise Exception("not implemented for this classifier")
    plot_calibration_curve(ytest, [yprob], ['xgboost'])
    context.log_artifact(PlotArtifact("calibration curve", body=plt.gcf()),
                         local_path=f"{plots_dest}/calibration curve.html")

    # start evaluating:
    # mm_plots.extend(learning_curves(model))
    if hasattr(model, "evals_result"):
        results = model.evals_result()
        train_set = list(results.items())[0]
        valid_set = list(results.items())[1]

        learning_curves = pd.DataFrame({
            "train_error": train_set[1]["error"],
            "train_auc": train_set[1]["auc"],
            "valid_error": valid_set[1]["error"],
            "valid_auc": valid_set[1]["auc"]
        })

        plt.clf()  #gcf_clear(plt)
        fig, ax = plt.subplots()
        plt.xlabel('# training examples')
        plt.ylabel('auc')
        plt.title('learning curve - auc')
        ax.plot(learning_curves.train_auc, label='train')
        ax.plot(learning_curves.valid_auc, label='valid')
        legend = ax.legend(loc='lower left')
        context.log_artifact(
            PlotArtifact("learning curve - auc", body=plt.gcf()),
            local_path=f"{plots_dest}/learning curve - auc.html")

        plt.clf()  #gcf_clear(plt)
        fig, ax = plt.subplots()
        plt.xlabel('# training examples')
        plt.ylabel('error rate')
        plt.title('learning curve - error')
        ax.plot(learning_curves.train_error, label='train')
        ax.plot(learning_curves.valid_error, label='valid')
        legend = ax.legend(loc='lower left')
        context.log_artifact(
            PlotArtifact("learning curve - erreur", body=plt.gcf()),
            local_path=f"{plots_dest}/learning curve - erreur.html")

    (fi_plot, fi_tbl) = feature_importances(model, xtest.columns)
    mm_plots.append(fi_plot)
    mm_tables.append(fi_tbl)

    mm_plots.append(confusion_matrix(model, xtest, ytest))

    if is_multiclass:
        lb = LabelBinarizer()
        ytest_b = lb.fit_transform(ytest)

        mm_plots.append(precision_recall_multi(ytest_b, yprob, unique_labels))
        mm_plots.append(roc_multi(ytest_b, yprob, unique_labels))

        # AUC multiclass
        mm.update({
            "auc-micro":
            metrics.roc_auc_score(ytest_b,
                                  yprob,
                                  multi_class="ovo",
                                  average="micro"),
            "auc-weighted":
            metrics.roc_auc_score(ytest_b,
                                  yprob,
                                  multi_class="ovo",
                                  average="weighted")
        })

        # others (todo - macro, micro...)
        mm.update({
            "f1-score":
            metrics.f1_score(ytest, ypred, average="micro"),
            "precision_score":
            metrics.precision_score(ytest, ypred, average="micro"),
            "recall_score":
            metrics.recall_score(ytest, ypred, average="micro")
        })

    else:
        # extract the positive label
        yprob_pos = yprob[:, 1]

        mm_plots.append(roc_bin(ytest, yprob_pos))
        mm_plots.append(precision_recall_bin(model, xtest, ytest, yprob_pos))

        mm.update({
            "rocauc":
            metrics.roc_auc_score(ytest, yprob_pos),
            "brier_score":
            metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()),
            "f1-score":
            metrics.f1_score(ytest, ypred),
            "precision_score":
            metrics.precision_score(ytest, ypred),
            "recall_score":
            metrics.recall_score(ytest, ypred)
        })

    # return all model metrics and plots
    mm.update({"plots": mm_plots, "tables": mm_tables})

    return mm