Ejemplo n.º 1
0
 def test_array_like(self):
     ax = skplt.plot_ks_statistic([0, 1], [[0.8, 0.2], [0.2, 0.8]])
Ejemplo n.º 2
0
"""An example showing the plot_ks_statistic method used by a scikit-learn classifier"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from scikitplot import classifier_factory
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer as load_data

X, y = load_data(return_X_y=True)
lr = classifier_factory(LogisticRegression())
lr.plot_ks_statistic(X, y, random_state=1)
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
lr = LogisticRegression()
lr = lr.fit(X, y)
probas = lr.predict_proba(X)
skplt.plot_ks_statistic(y_true=y, y_probas=probas)
plt.show()
Ejemplo n.º 3
0
 def test_array_like(self):
     ax = skplt.plot_ks_statistic([0, 1], [[0.8, 0.2], [0.2, 0.8]])
Ejemplo n.º 4
0
def plot_ks_statistic(clf, X, y, title='KS Statistic Plot', do_cv=True, cv=None,
                   shuffle=True, random_state=None, ax=None):
    """Generates the KS Statistic plot for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements "fit" and "predict_proba" methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to "KS Statistic Plot".

        do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the
            cross-validation strategy in `cv` to generate the confusion matrix. If False, the
            confusion matrix is generated without training or cross-validating the classifier.
            This assumes that the classifier has already been called with its `fit` method beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines the
            cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the
            training data before splitting using cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator state used
            for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot
            the learning curve. If None, the plot is drawn on a new set of axes.

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn.

    Example:
            >>> lr = classifier_factory(LogisticRegression())
            >>> lr.plot_ks_statistic(X, y, random_state=1)
            <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
            >>> plt.show()

        .. image:: _static/examples/plot_ks_statistic.png
           :align: center
           :alt: KS Statistic
    """
    y = np.array(y)

    if not hasattr(clf, 'predict_proba'):
        raise TypeError('"predict_proba" method not in classifier. Cannot calculate ROC Curve.')

    if not do_cv:
        probas = clf.predict_proba(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict_proba(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        probas = np.concatenate(preds_list, axis=0)
        y_true = np.concatenate(trues_list)

    ax = plotters.plot_ks_statistic(y_true, probas, title=title, ax=ax)

    return ax
Ejemplo n.º 5
0
 def plot_ks_statistic(self):
     # not working
     skplt.plot_ks_statistic(self.y_test, self.y_prob)
     plt.show()
Ejemplo n.º 6
0
def plot_ks_statistic_with_cv(clf, X, y, title='KS Statistic Plot',
                              do_cv=True, cv=None, shuffle=True,
                              random_state=None, ax=None, figsize=None,
                              title_fontsize="large", text_fontsize="medium"):
    """Generates the KS Statistic plot for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements "fit" and "predict_proba"
            methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to
            "KS Statistic Plot".

        do_cv (bool, optional): If True, the classifier is cross-validated on
            the dataset using the cross-validation strategy in `cv` to generate
            the confusion matrix. If False, the confusion matrix is generated
            without training or cross-validating the classifier. This assumes
            that the classifier has already been called with its `fit` method
            beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines
            the cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is
            used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines
            whether to shuffle the training data before splitting using
            cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator
            state used for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
            plot the learning curve. If None, the plot is drawn on a new set of
            axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot
            e.g. (6, 6). Defaults to ``None``.

        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "medium".

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
            drawn.

    Example:
            >>> lr = classifier_factory(LogisticRegression())
            >>> lr.plot_ks_statistic(X, y, random_state=1)
            <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
            >>> plt.show()

        .. image:: _static/examples/plot_ks_statistic.png
           :align: center
           :alt: KS Statistic
    """
    y = np.array(y)

    if not hasattr(clf, 'predict_proba'):
        raise TypeError('"predict_proba" method not in classifier. '
                        'Cannot calculate ROC Curve.')

    if not do_cv:
        probas = clf.predict_proba(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv, shuffle=shuffle,
                                 random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict_proba(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        probas = np.concatenate(preds_list, axis=0)
        y_true = np.concatenate(trues_list)

    ax = plotters.plot_ks_statistic(y_true, probas, title=title,
                                    ax=ax, figsize=figsize,
                                    title_fontsize=title_fontsize,
                                    text_fontsize=text_fontsize)

    return ax