def test_cmap(self): plot_calibration_curve(convert_labels_into_string(self.y), [self.lr_probas, self.rf_probas], cmap='Spectral') plot_calibration_curve(convert_labels_into_string(self.y), [self.lr_probas, self.rf_probas], cmap=plt.cm.Spectral)
def test_array_like(self): plot_calibration_curve( self.y, [self.lr_probas.tolist(), self.rf_probas.tolist()]) plot_calibration_curve(convert_labels_into_string( self.y), [self.lr_probas.tolist(), self.rf_probas.tolist()])
def test_ax(self): plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas]) fig, ax = plt.subplots(1, 1) out_ax = plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas]) assert ax is not out_ax out_ax = plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas], ax=ax) assert ax is out_ax
def test_string_classes(self): plot_calibration_curve(convert_labels_into_string(self.y), [self.lr_probas, self.rf_probas])
def test_plot_calibration(self): plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas])
def test_decision_function(self): plot_calibration_curve(self.y, [self.lr_probas, self.rf_probas, self.svc_scores])
def eval_model_v2( context, xtest, ytest, model, pcurve_bins: int = 10, pcurve_names: List[str] = ["my classifier"], plots_artifact_path: str = "", pred_params: dict = {}, cmap="Blues", ): """generate predictions and validation stats pred_params are non-default, scikit-learn api prediction-function parameters. For example, a tree-type of model may have a tree depth limit for its prediction function. :param xtest: features array type Union(DataItem, DataFrame, numpy array) :param ytest: ground-truth labels Union(DataItem, DataFrame, Series, numpy array, List) :param model: estimated model :param pcurve_bins: (10) subdivide [0,1] interval into n bins, x-axis :param pcurve_names: label for each calibration curve :param pred_params: (None) dict of predict function parameters :param cmap: ('Blues') matplotlib color map """ import numpy as np def df_blob(df): return bytes(df.to_csv(index=False), encoding="utf-8") if isinstance(ytest, np.ndarray): unique_labels = np.unique(ytest) elif isinstance(ytest, list): unique_labels = set(ytest) else: try: ytest = ytest.values unique_labels = np.unique(ytest) except Exception as exc: raise Exception(f"unrecognized data type for ytest {exc}") n_classes = len(unique_labels) is_multiclass = True if n_classes > 2 else False # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED plots_path = plots_artifact_path or context.artifact_subpath("plots") extra_data = {} ypred = model.predict(xtest, **pred_params) context.log_results({ "accuracy": float(metrics.accuracy_score(ytest, ypred)), "test-error": np.sum(ytest != ypred) / ytest.shape[0], }) # PROBABILITIES if hasattr(model, "predict_proba"): yprob = model.predict_proba(xtest, **pred_params) if not is_multiclass: fraction_of_positives, mean_predicted_value = calibration_curve( ytest, yprob[:, -1], n_bins=pcurve_bins, strategy="uniform") cmd = plot_calibration_curve(ytest, [yprob], pcurve_names) calibration = context.log_artifact( PlotArtifact( "probability-calibration", body=cmd.get_figure(), title="probability calibration plot", ), artifact_path=plots_path, db_key=False, ) extra_data["probability calibration"] = calibration # CONFUSION MATRIX cm = sklearn_confusion_matrix(ytest, ypred, normalize="all") df = pd.DataFrame(data=cm) extra_data["confusion matrix table.csv"] = df_blob(df) cmd = metrics.plot_confusion_matrix( model, xtest, ytest, normalize="all", values_format=".2g", cmap=plt.get_cmap(cmap), ) confusion = context.log_artifact( PlotArtifact( "confusion-matrix", body=cmd.figure_, title="Confusion Matrix - Normalized Plot", ), artifact_path=plots_path, db_key=False, ) extra_data["confusion matrix"] = confusion # LEARNING CURVES if hasattr(model, "evals_result"): results = model.evals_result() train_set = list(results.items())[0] valid_set = list(results.items())[1] learning_curves_df = None if is_multiclass: if hasattr(train_set[1], "merror"): learning_curves_df = pd.DataFrame({ "train_error": train_set[1]["merror"], "valid_error": valid_set[1]["merror"], }) else: if hasattr(train_set[1], "error"): learning_curves_df = pd.DataFrame({ "train_error": train_set[1]["error"], "valid_error": valid_set[1]["error"], }) if learning_curves_df: extra_data["learning curve table.csv"] = df_blob( learning_curves_df) _, ax = plt.subplots() plt.xlabel("# training examples") plt.ylabel("error rate") plt.title("learning curve - error") ax.plot(learning_curves_df["train_error"], label="train") ax.plot(learning_curves_df["valid_error"], label="valid") learning = context.log_artifact( PlotArtifact("learning-curve", body=plt.gcf(), title="Learning Curve - erreur"), artifact_path=plots_path, db_key=False, ) extra_data["learning curve"] = learning # FEATURE IMPORTANCES if hasattr(model, "feature_importances_"): (fi_plot, fi_tbl) = feature_importances(model, xtest.columns) extra_data["feature importances"] = context.log_artifact( fi_plot, db_key=False, artifact_path=plots_path) extra_data["feature importances table.csv"] = df_blob(fi_tbl) # AUC - ROC - PR CURVES if is_multiclass: lb = LabelBinarizer() ytest_b = lb.fit_transform(ytest) extra_data["precision_recall_multi"] = context.log_artifact( precision_recall_multi(ytest_b, yprob, unique_labels), artifact_path=plots_path, db_key=False, ) extra_data["roc_multi"] = context.log_artifact( roc_multi(ytest_b, yprob, unique_labels), artifact_path=plots_path, db_key=False, ) # AUC multiclass aucmicro = metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="micro") aucweighted = metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="weighted") context.log_results({ "auc-micro": aucmicro, "auc-weighted": aucweighted }) # others (todo - macro, micro...) f1 = metrics.f1_score(ytest, ypred, average="macro") ps = metrics.precision_score(ytest, ypred, average="macro") rs = metrics.recall_score(ytest, ypred, average="macro") context.log_results({ "f1-score": f1, "precision_score": ps, "recall_score": rs }) else: yprob_pos = yprob[:, 1] extra_data["precision_recall_bin"] = context.log_artifact( precision_recall_bin(model, xtest, ytest, yprob_pos), artifact_path=plots_path, db_key=False, ) extra_data["roc_bin"] = context.log_artifact( roc_bin(ytest, yprob_pos, clear=True), artifact_path=plots_path, db_key=False, ) rocauc = metrics.roc_auc_score(ytest, yprob_pos) brier_score = metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()) f1 = metrics.f1_score(ytest, ypred) ps = metrics.precision_score(ytest, ypred) rs = metrics.recall_score(ytest, ypred) context.log_results({ "rocauc": rocauc, "brier_score": brier_score, "f1-score": f1, "precision_score": ps, "recall_score": rs, }) # return all model metrics and plots return extra_data
from scikitplot.metrics import plot_calibration_curve X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train, y_train, stratify=y_train, random_state=0) rf = RandomForestClassifier(n_estimators=100) rf_probas = rf.fit(X_train_sub, y_train_sub).predict_proba(X_test) lr_probas = lr.fit(X_train_sub, y_train_sub).predict_proba(X_test) probas_list = [rf_probas, lr_probas] clf_names = ['Random Forest', 'Logistic Regression'] plot_calibration_curve(y_test, probas_list, clf_names, n_bins=4) ######## Sigmoid + Isotonic Regression ####### # specifying `cv='prefit'` says to use the prefit `rf` model from before cal_rf = CalibratedClassifierCV(rf, cv="prefit", method='sigmoid') cal_rf.fit(X_val, y_val) scores_sigm = cal_rf.predict_proba(X_test) cal_rf_iso = CalibratedClassifierCV(rf, cv="prefit", method='isotonic') cal_rf_iso.fit(X_val, y_val) scores_iso = cal_rf_iso.predict_proba(X_test) probas_list = [rf_probas, lr_probas, scores_sigm, scores_iso] clf_names = ['Random Forest', 'Logistic Regression', 'Sigmoid', 'Isotonic']
def eval_class_model(context, xtest, ytest, model, plots_dest: str = "plots", pred_params: dict = {}): """generate predictions and validation stats pred_params are non-default, scikit-learn api prediction-function parameters. For example, a tree-type of model may have a tree depth limit for its prediction function. :param xtest: features array type Union(DataItem, DataFrame, np. Array) :param ytest: ground-truth labels Union(DataItem, DataFrame, Series, np. Array, List) :param model: estimated model :param pred_params: (None) dict of predict function parameters """ if isinstance(ytest, np.ndarray): unique_labels = np.unique(ytest) elif isinstance(ytest, list): unique_labels = set(ytest) else: try: ytest = ytest.values unique_labels = np.unique(ytest) except: raise Exception("unrecognized data type for ytest") n_classes = len(unique_labels) is_multiclass = True if n_classes > 2 else False # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED mm_plots = [] mm_tables = [] mm = {} ypred = model.predict(xtest, **pred_params) mm.update({ "test-accuracy": float(metrics.accuracy_score(ytest, ypred)), "test-error": np.sum(ytest != ypred) / ytest.shape[0] }) # GEN PROBS (INCL CALIBRATED PROBABILITIES) if hasattr(model, "predict_proba"): yprob = model.predict_proba(xtest, **pred_params) else: # todo if decision fn... raise Exception("not implemented for this classifier") plot_calibration_curve(ytest, [yprob], ['xgboost']) context.log_artifact(PlotArtifact("calibration curve", body=plt.gcf()), local_path=f"{plots_dest}/calibration curve.html") # start evaluating: # mm_plots.extend(learning_curves(model)) if hasattr(model, "evals_result"): results = model.evals_result() train_set = list(results.items())[0] valid_set = list(results.items())[1] learning_curves = pd.DataFrame({ "train_error": train_set[1]["error"], "train_auc": train_set[1]["auc"], "valid_error": valid_set[1]["error"], "valid_auc": valid_set[1]["auc"] }) plt.clf() #gcf_clear(plt) fig, ax = plt.subplots() plt.xlabel('# training examples') plt.ylabel('auc') plt.title('learning curve - auc') ax.plot(learning_curves.train_auc, label='train') ax.plot(learning_curves.valid_auc, label='valid') legend = ax.legend(loc='lower left') context.log_artifact( PlotArtifact("learning curve - auc", body=plt.gcf()), local_path=f"{plots_dest}/learning curve - auc.html") plt.clf() #gcf_clear(plt) fig, ax = plt.subplots() plt.xlabel('# training examples') plt.ylabel('error rate') plt.title('learning curve - error') ax.plot(learning_curves.train_error, label='train') ax.plot(learning_curves.valid_error, label='valid') legend = ax.legend(loc='lower left') context.log_artifact( PlotArtifact("learning curve - erreur", body=plt.gcf()), local_path=f"{plots_dest}/learning curve - erreur.html") (fi_plot, fi_tbl) = feature_importances(model, xtest.columns) mm_plots.append(fi_plot) mm_tables.append(fi_tbl) mm_plots.append(confusion_matrix(model, xtest, ytest)) if is_multiclass: lb = LabelBinarizer() ytest_b = lb.fit_transform(ytest) mm_plots.append(precision_recall_multi(ytest_b, yprob, unique_labels)) mm_plots.append(roc_multi(ytest_b, yprob, unique_labels)) # AUC multiclass mm.update({ "auc-micro": metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="micro"), "auc-weighted": metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="weighted") }) # others (todo - macro, micro...) mm.update({ "f1-score": metrics.f1_score(ytest, ypred, average="micro"), "precision_score": metrics.precision_score(ytest, ypred, average="micro"), "recall_score": metrics.recall_score(ytest, ypred, average="micro") }) else: # extract the positive label yprob_pos = yprob[:, 1] mm_plots.append(roc_bin(ytest, yprob_pos)) mm_plots.append(precision_recall_bin(model, xtest, ytest, yprob_pos)) mm.update({ "rocauc": metrics.roc_auc_score(ytest, yprob_pos), "brier_score": metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()), "f1-score": metrics.f1_score(ytest, ypred), "precision_score": metrics.precision_score(ytest, ypred), "recall_score": metrics.recall_score(ytest, ypred) }) # return all model metrics and plots mm.update({"plots": mm_plots, "tables": mm_tables}) return mm