Ejemplo n.º 1
0
def make_plots(y_test, y_pred, y_prob, algorithm, timestamp):
    def _save_and_close(type):
        plt.savefig('static/img/{}/{}-{}.png'.format(type, algorithm,
                                                     timestamp),
                    dpi=200)
        plt.close('all')

    size = (20, 20)
    name = classifier_names[algorithm]

    plot_confusion_matrix(y_test,
                          y_pred,
                          normalize=True,
                          figsize=size,
                          title_fontsize=40,
                          text_fontsize=30,
                          title=name)
    _save_and_close('cm')

    if y_prob is not None:
        plot_precision_recall_curve(y_test,
                                    y_prob,
                                    figsize=size,
                                    title_fontsize=40,
                                    text_fontsize=25,
                                    title=name)
        _save_and_close('precrec')

        plot_roc_curve(y_test,
                       y_prob,
                       figsize=size,
                       title_fontsize=40,
                       text_fontsize=25,
                       title=name)
        _save_and_close('roc')
Ejemplo n.º 2
0
    def _plotRocCurve50_50(self, key, values, ax):
        for i, clfr in enumerate(values):
            if (clfr.weight_train == 0.5):
                skplt.plot_roc_curve(y_true=clfr.y_test,
                                     y_probas=clfr.probas,
                                     ax=ax,
                                     title=key)

        ax.legend(loc='lower right')
        ax.set_ylabel('Score')
        ax.set_xlabel('%Train')
        ax.grid(True)
        ax.tick_params(labelsize="medium")

        return ax
Ejemplo n.º 3
0
def draw_roc():
    y_label = []
    y_prediction = []
    with open('predictions.csv', 'rt') as f:
        data = csv.reader(f, delimiter=',')
        for d in data:
            if d[0] == '0':
                y_label.append('benign')
            else:
                y_label.append('malignant')
            # y_label.append(int(d[0]))

    with open('final_predictions.csv', 'rt') as f:
        data = csv.reader(f, delimiter=',')
        for d in data:
            y_prediction.append([float(d[1]), float(d[0])])

    print(y_label)
    print(y_prediction)

    skplt.plot_roc_curve(y_label, y_prediction)
    plt.show()
Ejemplo n.º 4
0
    def plotRocCurve(self, classifier_name, classifierResults):
        fig, axes = plt.subplots(3, 3, figsize=(15, 15))
        fig.canvas.set_window_title(classifier_name)

        indexes = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0),
                   (2, 1), (2, 2)]

        for i, classifierResult in enumerate(classifierResults):

            skplt.plot_roc_curve(y_true=classifierResult.y_test,
                                 y_probas=classifierResult.probas,
                                 ax=axes[indexes[i]])

            # set the current axes instance
            plt.sca(axes[indexes[i]])
            axes[indexes[i]].set_xlabel("Training/Test ({}/{})".format(
                round(classifierResult.weight_train * 100, 0),
                round(classifierResult.weight_test * 100, 0)))  # set x label
            axes[indexes[i]].get_xaxis().set_ticks([])  # hidden x axis text
            axes[indexes[i]].get_yaxis().set_ticks([])

        fig.subplots_adjust(hspace=0.3)
        plt.tight_layout()
        plt.savefig("plots/roc_curve_{}.pdf".format(classifier_name))
Ejemplo n.º 5
0
 def test_array_like(self):
     ax = skplt.plot_roc_curve([0, 1], [[0.8, 0.2], [0.2, 0.8]])
Ejemplo n.º 6
0
 def test_array_like(self):
     ax = skplt.plot_roc_curve([0, 1], [[0.8, 0.2], [0.2, 0.8]])
Ejemplo n.º 7
0
def plot_roc_curve(clf, X, y, title='ROC Curves', do_cv=True, cv=None,
                   shuffle=True, random_state=None, ax=None):
    """Generates the ROC curves for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements "fit" and "predict_proba" methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to "ROC Curves".

        do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the
            cross-validation strategy in `cv` to generate the confusion matrix. If False, the
            confusion matrix is generated without training or cross-validating the classifier.
            This assumes that the classifier has already been called with its `fit` method beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines the
            cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the
            training data before splitting using cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator state used
            for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot
            the learning curve. If None, the plot is drawn on a new set of axes.

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn.

    Example:
            >>> nb = classifier_factory(GaussianNB())
            >>> nb.plot_roc_curve(X, y, random_state=1)
            <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
            >>> plt.show()

        .. image:: _static/examples/plot_roc_curve.png
           :align: center
           :alt: ROC Curves
    """
    y = np.array(y)

    if not hasattr(clf, 'predict_proba'):
        raise TypeError('"predict_proba" method not in classifier. Cannot calculate ROC Curve.')

    if not do_cv:
        probas = clf.predict_proba(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict_proba(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        probas = np.concatenate(preds_list, axis=0)
        y_true = np.concatenate(trues_list)

    # Compute ROC curve and ROC area for each class
    ax = plotters.plot_roc_curve(y_true=y_true, y_probas=probas, title=title, ax=ax)
    return ax
Ejemplo n.º 8
0
"""An example showing the plot_roc_curve method used by a scikit-learn classifier"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from scikitplot import classifier_factory
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits as load_data


X, y = load_data(return_X_y=True)
nb = classifier_factory(GaussianNB())
nb.plot_roc_curve(X, y, random_state=1)
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
nb = GaussianNB()
nb = nb.fit(X, y)
probas = nb.predict_proba(X)
skplt.plot_roc_curve(y_true=y, y_probas=probas)
plt.show()
Ejemplo n.º 9
0
def plot_roc_curve_with_cv(clf,
                           X,
                           y,
                           title='ROC Curves',
                           do_cv=True,
                           cv=None,
                           shuffle=True,
                           random_state=None,
                           curves=('micro', 'macro', 'each_class'),
                           ax=None,
                           figsize=None,
                           cmap='nipy_spectral',
                           title_fontsize="large",
                           text_fontsize="medium"):
    """Generates the ROC curves for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements ``fit`` and ``predict``
            methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to
            "ROC Curves".

        do_cv (bool, optional): If True, the classifier is cross-validated on
            the dataset using the cross-validation strategy in `cv` to generate
            the confusion matrix. If False, the confusion matrix is generated
            without training or cross-validating the classifier. This assumes
            that the classifier has already been called with its `fit` method
            beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines
            the cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is
            used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines
            whether to shuffle the training data before splitting using
            cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator
            state used for random sampling.

        curves (array-like): A listing of which curves should be plotted on the
            resulting plot. Defaults to `("micro", "macro", "each_class")`
            i.e. "micro" for micro-averaged curve, "macro" for macro-averaged
            curve

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
            plot the learning curve. If None, the plot is drawn on a new set of
            axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot
            e.g. (6, 6). Defaults to ``None``.

        cmap (string or :class:`matplotlib.colors.Colormap` instance, optional):
            Colormap used for plotting the projection. View Matplotlib Colormap
            documentation for available options.
            https://matplotlib.org/users/colormaps.html

        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "medium".

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
            drawn.

    Example:
            >>> nb = classifier_factory(GaussianNB())
            >>> nb.plot_roc_curve(X, y, random_state=1)
            <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
            >>> plt.show()

        .. image:: _static/examples/plot_roc_curve.png
           :align: center
           :alt: ROC Curves
    """
    y = np.array(y)

    if not hasattr(clf, 'predict_proba'):
        raise TypeError('"predict_proba" method not in classifier. '
                        'Cannot calculate ROC Curve.')

    if not do_cv:
        probas = clf.predict_proba(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv,
                                 shuffle=shuffle,
                                 random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict_proba(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        probas = np.concatenate(preds_list, axis=0)
        y_true = np.concatenate(trues_list)

    # Compute ROC curve and ROC area for each class
    ax = plotters.plot_roc_curve(y_true=y_true,
                                 y_probas=probas,
                                 title=title,
                                 curves=curves,
                                 ax=ax,
                                 figsize=figsize,
                                 cmap=cmap,
                                 title_fontsize=title_fontsize,
                                 text_fontsize=text_fontsize)

    return ax
Ejemplo n.º 10
0
 def plot_roc_auc(self):
     # only for binary classification
     if self.n_classes <= 2:
         skplt.plot_roc_curve(self.y_test, self.y_prob)
         plt.show()
def report_and_roc_plot(data_x, data_y, model):
    results_to_vals = np.vectorize(lambda x: '1' if x == 1 else '0')

    predicted_report(data_y, model.predict(data_x))
    skplt.plot_roc_curve(results_to_vals(data_y), model.predict_proba(data_x))
    plt.show()
Ejemplo n.º 12
0
train, val = train_test_split(data, test_size=0.3)
X_train, X_val, y_train, y_val = train_test_split(X_all, y_all, test_size = 0.3)

from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=3, shuffle=True)

from sklearn.model_selection import cross_val_score
score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='neg_mean_squared_error', cv=cv).mean()
score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='accuracy', cv=cv).mean()

#### Learning Curve

from scikitplot import plotters as skplt
skplt.plot_learning_curve(LogisticRegression(), X_all, y_all)
plt.show()
skplt.plot_roc_curve(y_true=y_val, y_probas=y_proba)
plt.show()
skplt.plot_precision_recall_curve(y_true=y_val, y_probas=y_proba)
plt.show()
skplt.plot_confusion_matrix(y_true=y_val, y_pred=y_pred, normalize=True)
plt.show()

#### XGBoost

from xgboost import XGBRegressor
import xgboost as xgb
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
}
dtrain = xgb.DMatrix(X_all, label=y_all)
Ejemplo n.º 13
0
def plot_roc_curve_with_cv(clf, X, y, title='ROC Curves', do_cv=True,
                           cv=None, shuffle=True, random_state=None,
                           curves=('micro', 'macro', 'each_class'),
                           ax=None, figsize=None, cmap='nipy_spectral',
                           title_fontsize="large", text_fontsize="medium"):
    """Generates the ROC curves for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements ``fit`` and ``predict``
            methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to
            "ROC Curves".

        do_cv (bool, optional): If True, the classifier is cross-validated on
            the dataset using the cross-validation strategy in `cv` to generate
            the confusion matrix. If False, the confusion matrix is generated
            without training or cross-validating the classifier. This assumes
            that the classifier has already been called with its `fit` method
            beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines
            the cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is
            used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines
            whether to shuffle the training data before splitting using
            cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator
            state used for random sampling.

        curves (array-like): A listing of which curves should be plotted on the
            resulting plot. Defaults to `("micro", "macro", "each_class")`
            i.e. "micro" for micro-averaged curve, "macro" for macro-averaged
            curve

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
            plot the learning curve. If None, the plot is drawn on a new set of
            axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot
            e.g. (6, 6). Defaults to ``None``.

        cmap (string or :class:`matplotlib.colors.Colormap` instance, optional):
            Colormap used for plotting the projection. View Matplotlib Colormap
            documentation for available options.
            https://matplotlib.org/users/colormaps.html

        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "medium".

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
            drawn.

    Example:
            >>> nb = classifier_factory(GaussianNB())
            >>> nb.plot_roc_curve(X, y, random_state=1)
            <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
            >>> plt.show()

        .. image:: _static/examples/plot_roc_curve.png
           :align: center
           :alt: ROC Curves
    """
    y = np.array(y)

    if not hasattr(clf, 'predict_proba'):
        raise TypeError('"predict_proba" method not in classifier. '
                        'Cannot calculate ROC Curve.')

    if not do_cv:
        probas = clf.predict_proba(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv, shuffle=shuffle,
                                 random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict_proba(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        probas = np.concatenate(preds_list, axis=0)
        y_true = np.concatenate(trues_list)

    # Compute ROC curve and ROC area for each class
    ax = plotters.plot_roc_curve(y_true=y_true, y_probas=probas, title=title,
                                 curves=curves, ax=ax, figsize=figsize,
                                 cmap=cmap, title_fontsize=title_fontsize,
                                 text_fontsize=text_fontsize)

    return ax
Ejemplo n.º 14
0
import scikitplot.plotters as skplt
import matplotlib.pyplot as plt

# preds = clf.predict_proba(Xtest)
# skplt.plot_roc_curve(ytest, preds)
# plt.show()

X, y = make_classification(n_samples=10000,
                           n_features=10,
                           n_classes=2,
                           n_informative=5)
Xtrain = X[:9000]
Xtest = X[9000:]
ytrain = y[:9000]
ytest = y[9000:]

clf = LogisticRegression()
clf.fit(Xtrain, ytrain)

# preds = clf.predict_proba(Xtest)[:,1]
preds = clf.predict_proba(Xtest)

skplt.plot_roc_curve(ytest, preds)
plt.show()

# fpr, tpr, _ = metrics.roc_curve(ytest, preds)

# df = pd.DataFrame(dict(fpr=fpr, tpr=tpr))
# ggplot(df, aes(x='fpr', y='tpr')) +\
#     geom_line() +\
#     geom_abline(linetype='dashed', slope=1,intercept=0)