def make_plots(y_test, y_pred, y_prob, algorithm, timestamp): def _save_and_close(type): plt.savefig('static/img/{}/{}-{}.png'.format(type, algorithm, timestamp), dpi=200) plt.close('all') size = (20, 20) name = classifier_names[algorithm] plot_confusion_matrix(y_test, y_pred, normalize=True, figsize=size, title_fontsize=40, text_fontsize=30, title=name) _save_and_close('cm') if y_prob is not None: plot_precision_recall_curve(y_test, y_prob, figsize=size, title_fontsize=40, text_fontsize=25, title=name) _save_and_close('precrec') plot_roc_curve(y_test, y_prob, figsize=size, title_fontsize=40, text_fontsize=25, title=name) _save_and_close('roc')
def _plotRocCurve50_50(self, key, values, ax): for i, clfr in enumerate(values): if (clfr.weight_train == 0.5): skplt.plot_roc_curve(y_true=clfr.y_test, y_probas=clfr.probas, ax=ax, title=key) ax.legend(loc='lower right') ax.set_ylabel('Score') ax.set_xlabel('%Train') ax.grid(True) ax.tick_params(labelsize="medium") return ax
def draw_roc(): y_label = [] y_prediction = [] with open('predictions.csv', 'rt') as f: data = csv.reader(f, delimiter=',') for d in data: if d[0] == '0': y_label.append('benign') else: y_label.append('malignant') # y_label.append(int(d[0])) with open('final_predictions.csv', 'rt') as f: data = csv.reader(f, delimiter=',') for d in data: y_prediction.append([float(d[1]), float(d[0])]) print(y_label) print(y_prediction) skplt.plot_roc_curve(y_label, y_prediction) plt.show()
def plotRocCurve(self, classifier_name, classifierResults): fig, axes = plt.subplots(3, 3, figsize=(15, 15)) fig.canvas.set_window_title(classifier_name) indexes = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)] for i, classifierResult in enumerate(classifierResults): skplt.plot_roc_curve(y_true=classifierResult.y_test, y_probas=classifierResult.probas, ax=axes[indexes[i]]) # set the current axes instance plt.sca(axes[indexes[i]]) axes[indexes[i]].set_xlabel("Training/Test ({}/{})".format( round(classifierResult.weight_train * 100, 0), round(classifierResult.weight_test * 100, 0))) # set x label axes[indexes[i]].get_xaxis().set_ticks([]) # hidden x axis text axes[indexes[i]].get_yaxis().set_ticks([]) fig.subplots_adjust(hspace=0.3) plt.tight_layout() plt.savefig("plots/roc_curve_{}.pdf".format(classifier_name))
def test_array_like(self): ax = skplt.plot_roc_curve([0, 1], [[0.8, 0.2], [0.2, 0.8]])
def plot_roc_curve(clf, X, y, title='ROC Curves', do_cv=True, cv=None, shuffle=True, random_state=None, ax=None): """Generates the ROC curves for a given classifier and dataset. Args: clf: Classifier instance that implements "fit" and "predict_proba" methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. title (string, optional): Title of the generated plot. Defaults to "ROC Curves". do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> nb = classifier_factory(GaussianNB()) >>> nb.plot_roc_curve(X, y, random_state=1) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_roc_curve.png :align: center :alt: ROC Curves """ y = np.array(y) if not hasattr(clf, 'predict_proba'): raise TypeError('"predict_proba" method not in classifier. Cannot calculate ROC Curve.') if not do_cv: probas = clf.predict_proba(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict_proba(X_test) preds_list.append(preds) trues_list.append(y_test) probas = np.concatenate(preds_list, axis=0) y_true = np.concatenate(trues_list) # Compute ROC curve and ROC area for each class ax = plotters.plot_roc_curve(y_true=y_true, y_probas=probas, title=title, ax=ax) return ax
"""An example showing the plot_roc_curve method used by a scikit-learn classifier""" from __future__ import absolute_import import matplotlib.pyplot as plt from scikitplot import classifier_factory from sklearn.naive_bayes import GaussianNB from sklearn.datasets import load_digits as load_data X, y = load_data(return_X_y=True) nb = classifier_factory(GaussianNB()) nb.plot_roc_curve(X, y, random_state=1) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt nb = GaussianNB() nb = nb.fit(X, y) probas = nb.predict_proba(X) skplt.plot_roc_curve(y_true=y, y_probas=probas) plt.show()
def plot_roc_curve_with_cv(clf, X, y, title='ROC Curves', do_cv=True, cv=None, shuffle=True, random_state=None, curves=('micro', 'macro', 'each_class'), ax=None, figsize=None, cmap='nipy_spectral', title_fontsize="large", text_fontsize="medium"): """Generates the ROC curves for a given classifier and dataset. Args: clf: Classifier instance that implements ``fit`` and ``predict`` methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. title (string, optional): Title of the generated plot. Defaults to "ROC Curves". do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. curves (array-like): A listing of which curves should be plotted on the resulting plot. Defaults to `("micro", "macro", "each_class")` i.e. "micro" for micro-averaged curve, "macro" for macro-averaged curve ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. cmap (string or :class:`matplotlib.colors.Colormap` instance, optional): Colormap used for plotting the projection. View Matplotlib Colormap documentation for available options. https://matplotlib.org/users/colormaps.html title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> nb = classifier_factory(GaussianNB()) >>> nb.plot_roc_curve(X, y, random_state=1) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_roc_curve.png :align: center :alt: ROC Curves """ y = np.array(y) if not hasattr(clf, 'predict_proba'): raise TypeError('"predict_proba" method not in classifier. ' 'Cannot calculate ROC Curve.') if not do_cv: probas = clf.predict_proba(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict_proba(X_test) preds_list.append(preds) trues_list.append(y_test) probas = np.concatenate(preds_list, axis=0) y_true = np.concatenate(trues_list) # Compute ROC curve and ROC area for each class ax = plotters.plot_roc_curve(y_true=y_true, y_probas=probas, title=title, curves=curves, ax=ax, figsize=figsize, cmap=cmap, title_fontsize=title_fontsize, text_fontsize=text_fontsize) return ax
def plot_roc_auc(self): # only for binary classification if self.n_classes <= 2: skplt.plot_roc_curve(self.y_test, self.y_prob) plt.show()
def report_and_roc_plot(data_x, data_y, model): results_to_vals = np.vectorize(lambda x: '1' if x == 1 else '0') predicted_report(data_y, model.predict(data_x)) skplt.plot_roc_curve(results_to_vals(data_y), model.predict_proba(data_x)) plt.show()
train, val = train_test_split(data, test_size=0.3) X_train, X_val, y_train, y_val = train_test_split(X_all, y_all, test_size = 0.3) from sklearn.model_selection import StratifiedKFold cv = StratifiedKFold(n_splits=3, shuffle=True) from sklearn.model_selection import cross_val_score score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='neg_mean_squared_error', cv=cv).mean() score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='accuracy', cv=cv).mean() #### Learning Curve from scikitplot import plotters as skplt skplt.plot_learning_curve(LogisticRegression(), X_all, y_all) plt.show() skplt.plot_roc_curve(y_true=y_val, y_probas=y_proba) plt.show() skplt.plot_precision_recall_curve(y_true=y_val, y_probas=y_proba) plt.show() skplt.plot_confusion_matrix(y_true=y_val, y_pred=y_pred, normalize=True) plt.show() #### XGBoost from xgboost import XGBRegressor import xgboost as xgb params = { 'objective': 'binary:logistic', 'eval_metric': 'logloss', } dtrain = xgb.DMatrix(X_all, label=y_all)
import scikitplot.plotters as skplt import matplotlib.pyplot as plt # preds = clf.predict_proba(Xtest) # skplt.plot_roc_curve(ytest, preds) # plt.show() X, y = make_classification(n_samples=10000, n_features=10, n_classes=2, n_informative=5) Xtrain = X[:9000] Xtest = X[9000:] ytrain = y[:9000] ytest = y[9000:] clf = LogisticRegression() clf.fit(Xtrain, ytrain) # preds = clf.predict_proba(Xtest)[:,1] preds = clf.predict_proba(Xtest) skplt.plot_roc_curve(ytest, preds) plt.show() # fpr, tpr, _ = metrics.roc_curve(ytest, preds) # df = pd.DataFrame(dict(fpr=fpr, tpr=tpr)) # ggplot(df, aes(x='fpr', y='tpr')) +\ # geom_line() +\ # geom_abline(linetype='dashed', slope=1,intercept=0)