Ejemplo n.º 1
0
 def test_curve_diffs(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax_micro = clf.plot_precision_recall_curve(self.X, self.y, curves='micro')
     ax_class = clf.plot_precision_recall_curve(self.X, self.y, curves='each_class')
     self.assertNotEqual(ax_micro, ax_class)
Ejemplo n.º 2
0
 def test_do_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_precision_recall_curve(self.X, self.y)
     self.assertRaises(AttributeError, clf.plot_precision_recall_curve, self.X, self.y,
                       do_cv=False)
Ejemplo n.º 3
0
 def test_do_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_precision_recall_curve(self.X, self.y)
     self.assertRaises(AttributeError, clf.plot_precision_recall_curve, self.X, self.y,
                       do_cv=False)
Ejemplo n.º 4
0
 def test_train_sizes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_learning_curve(self.X,
                                  self.y,
                                  train_sizes=np.linspace(0.1, 1.0, 8))
Ejemplo n.º 5
0
 def test_curve_diffs(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax_micro = clf.plot_precision_recall_curve(self.X, self.y, curves='micro')
     ax_class = clf.plot_precision_recall_curve(self.X, self.y, curves='each_class')
     self.assertNotEqual(ax_micro, ax_class)
Ejemplo n.º 6
0
 def test_order(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     ax = clf.plot_feature_importances(order='ascending')
     ax = clf.plot_feature_importances(order='descending')
     ax = clf.plot_feature_importances(order=None)
Ejemplo n.º 7
0
 def test_cmap(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y, cmap='nipy_spectral')
     ax = clf.plot_confusion_matrix(self.X,
                                    self.y,
                                    cmap=plt.cm.nipy_spectral)
Ejemplo n.º 8
0
 def test_max_num_features(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     ax = clf.plot_feature_importances(max_num_features=2)
     ax = clf.plot_feature_importances(max_num_features=4)
     ax = clf.plot_feature_importances(max_num_features=6)
Ejemplo n.º 9
0
 def test_order(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     ax = clf.plot_feature_importances(order='ascending')
     ax = clf.plot_feature_importances(order='descending')
     ax = clf.plot_feature_importances(order=None)
Ejemplo n.º 10
0
 def test_max_num_features(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     ax = clf.plot_feature_importances(max_num_features=2)
     ax = clf.plot_feature_importances(max_num_features=4)
     ax = clf.plot_feature_importances(max_num_features=6)
Ejemplo n.º 11
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     fig, ax = plt.subplots(1, 1)
     out_ax = clf.plot_precision_recall_curve(self.X, self.y)
     assert ax is not out_ax
     out_ax = clf.plot_precision_recall_curve(self.X, self.y, ax=ax)
     assert ax is out_ax
Ejemplo n.º 12
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     fig, ax = plt.subplots(1, 1)
     out_ax = clf.plot_precision_recall_curve(self.X, self.y)
     assert ax is not out_ax
     out_ax = clf.plot_precision_recall_curve(self.X, self.y, ax=ax)
     assert ax is out_ax
Ejemplo n.º 13
0
 def test_invalid_curve_arg(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     self.assertRaises(ValueError,
                       clf.plot_precision_recall_curve,
                       self.X,
                       self.y,
                       curves='zzz')
Ejemplo n.º 14
0
 def test_do_split(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_ks_statistic(self.X, self.y)
     self.assertRaises(AttributeError,
                       clf.plot_ks_statistic,
                       self.X,
                       self.y,
                       do_split=False)
Ejemplo n.º 15
0
 def test_ax(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     fig, ax = plt.subplots(1, 1)
     out_ax = clf.plot_feature_importances()
     assert ax is not out_ax
     out_ax = clf.plot_feature_importances(ax=ax)
     assert ax is out_ax
Ejemplo n.º 16
0
 def test_do_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y)
     self.assertRaises(NotFittedError,
                       clf.plot_confusion_matrix,
                       self.X,
                       self.y,
                       do_cv=False)
Ejemplo n.º 17
0
 def test_cmap(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_precision_recall_curve(self.X,
                                          self.y,
                                          cmap='nipy_spectral')
     ax = clf.plot_precision_recall_curve(self.X,
                                          self.y,
                                          cmap=plt.cm.nipy_spectral)
Ejemplo n.º 18
0
    def test_true_pred_labels(self):
        np.random.seed(0)
        clf = LogisticRegression()
        scikitplot.classifier_factory(clf)

        true_labels = [0, 1]
        pred_labels = [0, 2]

        ax = clf.plot_confusion_matrix(self.X, self.y, true_labels=true_labels,
                pred_labels=pred_labels)
Ejemplo n.º 19
0
 def test_ax(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     fig, ax = plt.subplots(1, 1)
     out_ax = clf.plot_feature_importances()
     assert ax is not out_ax
     out_ax = clf.plot_feature_importances(ax=ax)
     assert ax is out_ax
Ejemplo n.º 20
0
    def test_true_pred_labels(self):
        np.random.seed(0)
        clf = LogisticRegression()
        scikitplot.classifier_factory(clf)

        true_labels = [0, 1]
        pred_labels = [0, 2]

        ax = clf.plot_confusion_matrix(self.X,
                                       self.y,
                                       true_labels=true_labels,
                                       pred_labels=pred_labels)
Ejemplo n.º 21
0
    def test_instance_validation(self):

        clf = self.Classifier()
        scikitplot.classifier_factory(clf)

        not_clf = self.NotClassifier()
        self.assertRaises(TypeError, scikitplot.classifier_factory, not_clf)

        partial_clf = self.PartialClassifier()
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            scikitplot.classifier_factory(partial_clf)
            assert len(w) == 2
            assert issubclass(w[-1].category, UserWarning)
            assert " not in clf. Some plots may not be possible to generate." in str(w[-1].message)
Ejemplo n.º 22
0
    def test_instance_validation(self):

        clf = self.Classifier()
        scikitplot.classifier_factory(clf)

        not_clf = self.NotClassifier()
        self.assertRaises(TypeError, scikitplot.classifier_factory, not_clf)

        partial_clf = self.PartialClassifier()
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            scikitplot.classifier_factory(partial_clf)
            assert len(w) == 1
            assert issubclass(w[-1].category, UserWarning)
            assert " not in clf. Some plots may not be possible to generate." in str(w[-1].message)
Ejemplo n.º 23
0
    def test_predict_proba(self):
        np.random.seed(0)

        class DummyClassifier:
            def __init__(self):
                pass

            def fit(self):
                pass

            def predict(self):
                pass

            def score(self):
                pass

        clf = DummyClassifier()
        scikitplot.classifier_factory(clf)
        self.assertRaises(TypeError, clf.plot_ks_statistic, self.X, self.y)
Ejemplo n.º 24
0
    def test_predict_proba(self):
        np.random.seed(0)

        class DummyClassifier:
            def __init__(self):
                pass

            def fit(self):
                pass

            def predict(self):
                pass

            def score(self):
                pass

        clf = DummyClassifier()
        scikitplot.classifier_factory(clf)
        self.assertRaises(TypeError, clf.plot_precision_recall_curve, self.X, self.y)
Ejemplo n.º 25
0
    def test_method_insertion(self):

        clf = self.Classifier()
        scikitplot.classifier_factory(clf)
        assert hasattr(clf, 'plot_learning_curve')
        assert hasattr(clf, 'plot_confusion_matrix')
        assert hasattr(clf, 'plot_roc_curve')
        assert hasattr(clf, 'plot_ks_statistic')
        assert hasattr(clf, 'plot_precision_recall_curve')
        assert hasattr(clf, 'plot_feature_importances')

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            scikitplot.classifier_factory(clf)

            assert len(w) == 7
            for warning in w[1:]:
                assert issubclass(warning.category, UserWarning)
                assert ' method already in clf. ' \
                       'Overriding anyway. This may ' \
                       'result in unintended behavior.' in str(warning.message)
Ejemplo n.º 26
0
    def test_method_insertion(self):

        clf = self.Classifier()
        scikitplot.classifier_factory(clf)
        assert hasattr(clf, 'plot_learning_curve')
        assert hasattr(clf, 'plot_confusion_matrix')
        assert hasattr(clf, 'plot_roc_curve')
        assert hasattr(clf, 'plot_ks_statistic')
        assert hasattr(clf, 'plot_precision_recall_curve')
        assert hasattr(clf, 'plot_feature_importances')

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            scikitplot.classifier_factory(clf)

            assert len(w) == 7
            for warning in w[1:]:
                assert issubclass(warning.category, UserWarning)
                assert ' method already in clf. ' \
                       'Overriding anyway. This may ' \
                       'result in unintended behavior.' in str(warning.message)
Ejemplo n.º 27
0
 def test_shuffle(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=True)
     ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=False)
Ejemplo n.º 28
0
 def test_shuffle(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=True)
     ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=False)
Ejemplo n.º 29
0
 def test_two_classes(self):
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     X, y = load_data(return_X_y=True)
     self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
Ejemplo n.º 30
0
 def test_invalid_curve_arg(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     self.assertRaises(ValueError, clf.plot_precision_recall_curve, self.X, self.y,
                       curves='zzz')
Ejemplo n.º 31
0
 def test_n_jobs(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_learning_curve(self.X, self.y, n_jobs=-1)
Ejemplo n.º 32
0
            'Please specify a valid dataset to use. Options: default, compare')
    # X = normalize(X, norm='l2', axis=1)
    # X = VarianceThreshold(threshold=0.00000005).fit_transform(X)
    # X = SelectKBest(f_classif, k=50).fit_transform(X, Y)
    X = StandardScaler().fit_transform(X)
    print('# features used: %d / %d' % (len(X[0]), x_len_before))

    # clf1 = LogisticRegression(random_state=1, verbose=VERBOSE)
    # clf2 = RandomForestClassifier(random_state=1, verbose=VERBOSE)
    # clf3 = GaussianNB()
    #
    # vc1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
    #                          voting='hard')
    # scores = cross_val_score(vc1, X, Y, cv=CROSS_VAL)

    estimators = generate_tuple_lists(CLASSIFIERS, NAMES)
    vc = VotingClassifier(estimators, voting='hard')
    nb = classifier_factory(vc)

    kfold = KFold(n_splits=CROSS_VAL, shuffle=True)

    start_time = time.time()
    scores = cross_val_score(vc, X, Y, cv=kfold, verbose=VERBOSE)
    end_time = time.time()

    print(scores)
    # The mean score and the 95% confidence interval

    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    print("--- %s seconds ---" % (end_time - start_time))
Ejemplo n.º 33
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_precision_recall_curve(
         self.X, convert_labels_into_string(self.y))
Ejemplo n.º 34
0
 def test_cmap(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y, cmap='nipy_spectral')
     ax = clf.plot_confusion_matrix(self.X, self.y, cmap=plt.cm.nipy_spectral)
Ejemplo n.º 35
0
"""An example showing the plot_feature_importances method used by a scikit-learn classifier"""
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris as load_data
import matplotlib.pyplot as plt
from scikitplot import classifier_factory

X, y = load_data(return_X_y=True)
rf = classifier_factory(RandomForestClassifier(random_state=1))
rf.fit(X, y)
rf.plot_feature_importances(feature_names=[
    'petal length', 'petal width', 'sepal length', 'sepal width'
])
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
rf = RandomForestClassifier()
rf = rf.fit(X, y)
skplt.plot_feature_importances(rf,
                               feature_names=[
                                   'petal length', 'petal width',
                                   'sepal length', 'sepal width'
                               ])
plt.show()
Ejemplo n.º 36
0
"""An example showing the plot_ks_statistic method used by a scikit-learn classifier"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from scikitplot import classifier_factory
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer as load_data

X, y = load_data(return_X_y=True)
lr = classifier_factory(LogisticRegression())
lr.plot_ks_statistic(X, y, random_state=1)
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
lr = LogisticRegression()
lr = lr.fit(X, y)
probas = lr.predict_proba(X)
skplt.plot_ks_statistic(y_true=y, y_probas=probas)
plt.show()
Ejemplo n.º 37
0
 def test_train_sizes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_learning_curve(self.X, self.y, train_sizes=np.linspace(0.1, 1.0, 8))
Ejemplo n.º 38
0
 def test_feature_importances_in_clf(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     self.assertRaises(TypeError, clf.plot_feature_importances)
# kjør clustering på t-sne
#sjekk feature importance opp mot de 50 andre


#feature importance
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris as load_data
import matplotlib.pyplot as plt
from scikitplot import classifier_factory

X1, y1 = load_data(return_X_y=True)
X1.shape
y1.shape
X.shape
y.shape
rf = classifier_factory(RandomForestClassifier(random_state=1))
rf.fit(X, Y)
rf.plot_feature_importances(feature_names=["feature"+str(i)for i in range(50)])
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
rf = RandomForestClassifier()
rf = rf.fit(X, y)
skplt.plot_feature_importances(rf, feature_names=['petal length', 'petal width',
                                                  'sepal length', 'sepal width'])
plt.show()
#kjøre PCA med 50 variable og se hvor mye dimensjonene forklarer varians

from sklearn.decomposition import PCA
from sklearn.datasets import load_digits as load_data
Ejemplo n.º 40
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, convert_labels_into_string(self.y))
     ax = clf.plot_feature_importances()
Ejemplo n.º 41
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, convert_labels_into_string(self.y))
     ax = clf.plot_feature_importances()
Ejemplo n.º 42
0
 def test_two_classes(self):
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     X, y = load_data(return_X_y=True)
     self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
Ejemplo n.º 43
0
 def test_feature_names(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     ax = clf.plot_feature_importances(feature_names=["a", "b", "c", "d"])
Ejemplo n.º 44
0
 def test_do_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y)
     self.assertRaises(NotFittedError, clf.plot_confusion_matrix, self.X, self.y, do_cv=False)
Ejemplo n.º 45
0
 def test_feature_names(self):
     np.random.seed(0)
     clf = RandomForestClassifier()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     ax = clf.plot_feature_importances(feature_names=["a", "b", "c", "d"])
Ejemplo n.º 46
0
 def test_labels(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y, labels=[0, 1, 2])
Ejemplo n.º 47
0
 def test_n_jobs(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_learning_curve(self.X, self.y, n_jobs=-1)
Ejemplo n.º 48
0
 def test_feature_importances_in_clf(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     clf.fit(self.X, self.y)
     self.assertRaises(TypeError, clf.plot_feature_importances)
Ejemplo n.º 49
0
 def test_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y)
     ax = clf.plot_confusion_matrix(self.X, self.y, cv=5)
Ejemplo n.º 50
0
 def test_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y)
     ax = clf.plot_confusion_matrix(self.X, self.y, cv=5)
Ejemplo n.º 51
0
 def test_labels(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_confusion_matrix(self.X, self.y, labels=[0, 1, 2])
Ejemplo n.º 52
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_precision_recall_curve(self.X, convert_labels_into_string(self.y))
Ejemplo n.º 53
0
 def test_cmap(self):
     np.random.seed(0)
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     ax = clf.plot_precision_recall_curve(self.X, self.y, cmap='nipy_spectral')
     ax = clf.plot_precision_recall_curve(self.X, self.y, cmap=plt.cm.nipy_spectral)
Ejemplo n.º 54
0
"""An example showing the plot_roc_curve method used by a scikit-learn classifier"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from scikitplot import classifier_factory
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits as load_data


X, y = load_data(return_X_y=True)
nb = classifier_factory(GaussianNB())
nb.plot_roc_curve(X, y, random_state=1)
plt.show()
Ejemplo n.º 55
0
"""An example showing the plot_learning_curve method used by a scikit-learn classifier"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from scikitplot import classifier_factory
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer as load_data

X, y = load_data(return_X_y=True)
rf = classifier_factory(RandomForestClassifier())
rf.plot_learning_curve(X, y)
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt

rf = RandomForestClassifier()
skplt.plot_learning_curve(rf, X, y)
plt.show()