Exemple #1
0
def test_632plus():
    tree = DecisionTreeClassifier(random_state=123)
    scores = bootstrap_point632_score(tree, X, y, random_seed=123,
                                      method='.632+')
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.96528, np.round(acc, 5)

    tree2 = DecisionTreeClassifier(random_state=123, max_depth=1)
    scores = bootstrap_point632_score(tree2, X, y, random_seed=123,
                                      method='.632+')
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.65034, np.round(acc, 5)
def test_632plus():
    tree = DecisionTreeClassifier(random_state=123)
    scores = bootstrap_point632_score(tree, X, y, random_seed=123,
                                      method='.632+')
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.96528, np.round(acc, 5)

    tree2 = DecisionTreeClassifier(random_state=123, max_depth=1)
    scores = bootstrap_point632_score(tree2, X, y, random_seed=123,
                                      method='.632+')
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.65034, np.round(acc, 5)
Exemple #3
0
    def bootstrap_632(self):
        """Performs bootstrap validation.
        """

        print (self.problem_name)
        for k in range(1, len(self.grid)):
            start_time = time.time()
            scores = bootstrap_point632_score(
                self.grid[k].best_estimator_, self.X.values,
                self.y.values, n_splits=1000,
                method='.632', random_seed=42
            )
            acc = np.mean(scores)
            lower = np.percentile(scores, 2.5)
            upper = np.percentile(scores, 97.5)
            end_time = np.round(time.time() - start_time, 2)
            self.bootstrap_results.append([np.round(100*acc, 2), 
                                           [np.round(100*lower, 2), 
                                            np.round(100*upper, 2)], 
                                           end_time])
            print(
                self.classifiers[k].upper(),
                ' acc: %.2f%%' % (100*acc), 
                ' 95%% Confidence interval: [%.2f, %.2f]' % \
                    (100*lower, 100*upper),
                ' time', end_time
            )
    def bootstrap_632(self, n_splits=None):
        """
        Performs bootstrap validation. 1000 iteration are good for small datasets,
        for large datasets we can skip bootstraping.
        """
        if n_splits == None:
            n_splits = self.n_bootstrap_splits

        print (self.problem_name)
        self.bootstrap_results = []
        # for k in range(1, len(self.grid)):
        for key in self.grid.keys():
            start_time = time.time()

            if isinstance(self.X, np.ndarray):
                scores = bootstrap_point632_score(
                    # self.grid[k].best_estimator_, self.X,
                    self.grid[key].best_estimator_, self.X,
                    self.y, n_splits=n_splits,
                    method='.632', random_seed=self.random_state
                )
            else:
                scores = bootstrap_point632_score(
                # self.grid[k].best_estimator_, self.X.values,
                self.grid[key].best_estimator_, self.X.values,
                self.y.values, n_splits=n_splits,
                method='.632', random_seed=self.random_state
            )


            acc = np.mean(scores)
            lower = np.percentile(scores, 2.5)
            upper = np.percentile(scores, 97.5)
            end_time = np.round(time.time() - start_time, 2)
            self.bootstrap_results.append([np.round(100*acc, 2),
                                           [np.round(100*lower, 2),
                                            np.round(100*upper, 2)],
                                           end_time])
            print(
                # self.classifiers[k].upper(),
                key,
                ' acc: %.2f%%' % (100*acc),
                ' 95%% Confidence interval: [%.2f, %.2f]' % \
                    (100*lower, 100*upper),
                ' time', end_time
            )
Exemple #5
0
def test_scoring():
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X[:100], y[:100],
                                      scoring='f1',
                                      random_seed=123)
    f1 = np.mean(scores)
    assert len(scores == 200)
    assert np.round(f1, 2) == 1.0, f1
def test_scoring():
    lr = LogisticRegression()
    scores = bootstrap_point632_score(lr, X[:100], y[:100],
                                      scoring='f1',
                                      random_seed=123)
    f1 = np.mean(scores)
    assert len(scores == 200)
    assert np.round(f1, 2) == 1.0, f1
def test_scoring():
    from sklearn.metrics import f1_score
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X[:100], y[:100],
                                      scoring_func=f1_score,
                                      random_seed=123)
    f1 = np.mean(scores)
    assert len(scores == 200)
    assert np.round(f1, 2) == 1.0, f1
Exemple #8
0
def test_scoring():
    from sklearn.metrics import f1_score
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X[:100], y[:100],
                                      scoring_func=f1_score,
                                      random_seed=123)
    f1 = np.mean(scores)
    assert len(scores == 200)
    assert np.round(f1, 2) == 1.0, f1
def test_scoring():
    lr = LogisticRegression()
    scores = bootstrap_point632_score(lr,
                                      X[:100],
                                      y[:100],
                                      scoring='f1',
                                      random_seed=123)
    f1 = np.mean(scores)
    assert len(scores == 200)
    assert np.round(f1, 2) == 1.0, f1
Exemple #10
0
def test_oob():
    tree = DecisionTreeClassifier(random_state=123)
    scores = bootstrap_point632_score(tree,
                                      X,
                                      y,
                                      random_seed=123,
                                      method='oob')
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.94667, np.round(acc, 5)
Exemple #11
0
def test_custom_accuracy():

    def accuracy2(targets, predictions):
        return sum([i == j for i, j in
                    zip(targets, predictions)]) / len(targets)
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X, y,
                                      random_seed=123,
                                      scoring_func=accuracy2)
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.95306, np.round(acc, 5)
def test_custom_accuracy():

    def accuracy2(targets, predictions):
        return sum([i == j for i, j in
                    zip(targets, predictions)]) / len(targets)
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X, y,
                                      random_seed=123,
                                      scoring_func=accuracy2)
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.95306, np.round(acc, 5)
Exemple #13
0
def test_scoring_proba():
    from sklearn.metrics import roc_auc_score
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')

    # test predict_proba
    scores = bootstrap_point632_score(lr,
                                      X[:100],
                                      y[:100],
                                      scoring_func=roc_auc_score,
                                      predict_proba=True,
                                      random_seed=123)
    roc_auc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(roc_auc, 2) == 1.0, roc_auc

    with pytest.raises(RuntimeError):
        clf = FakeClassifier()
        scores = bootstrap_point632_score(clf,
                                          X[:100],
                                          y[:100],
                                          scoring_func=roc_auc_score,
                                          predict_proba=True,
                                          random_seed=123)
Exemple #14
0
def compute_acc_with_ci(clf, X_test, y_test):
    from mlxtend.evaluate import bootstrap_point632_score
    from sklearn.metrics import balanced_accuracy_score

    scores = bootstrap_point632_score(
        clf,
        X=X_test,
        y=y_test,
        n_splits=500,
        method=".632+",
        clone_estimator=True,
        scoring_func=balanced_accuracy_score,
    )
    return scores
Exemple #15
0
def test_pandas_pass():
    tree = DecisionTreeClassifier(random_state=123)
    X_df = pd.DataFrame(X)
    y_ser = pd.Series(y)
    bootstrap_point632_score(tree, X_df, y_ser, random_seed=123, method='oob')
    bootstrap_point632_score(tree, X_df, y_ser, random_seed=123, method='.632')
    bootstrap_point632_score(tree,
                             X_df,
                             y_ser,
                             random_seed=123,
                             method='.632+')
def test_defaults():
    lr = LogisticRegression()
    scores = bootstrap_point632_score(lr, X, y, random_seed=123)
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 2) == 0.95
Exemple #17
0
def test_defaults():
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X, y, random_seed=123)
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.95306, np.round(acc, 5)
def test_defaults():
    lr = LogisticRegression(solver='liblinear', multi_class='ovr')
    scores = bootstrap_point632_score(lr, X, y, random_seed=123)
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.95306, np.round(acc, 5)
    s = np.random.randint(X.shape[0], size=X.shape[0])
    B.append(X[s].mean())

se = np.sqrt(np.var(B))
Cn = (p - z * se, p + z * se)
_ = ["%0.4f" % x for x in Cn]
print(_)

Cn = (np.percentile(B, alpha * 100), np.percentile(B, (1 - alpha) * 100))
_ = ["%0.4f" % x for x in Cn]
print(_)

from mlxtend.evaluate import bootstrap_point632_score
X, y = load_iris(return_X_y=True)
cl = GaussianNB()
B = bootstrap_point632_score(cl, X, y, n_splits=500)
Cn = (np.percentile(B, alpha * 100), np.percentile(B, (1 - alpha) * 100))
_ = ["%0.4f" % x for x in Cn]
print(_)

### macro Recall
from scipy.stats import norm
import numpy as np
from sklearn.datasets import load_iris
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import recall_score

alpha = 0.05
def test_oob():
    tree = DecisionTreeClassifier(random_state=123)
    scores = bootstrap_point632_score(tree, X, y, random_seed=123, method='oob')
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 5) == 0.94667, np.round(acc, 5)
def test_defaults():
    lr = LogisticRegression()
    scores = bootstrap_point632_score(lr, X, y, random_seed=123)
    acc = np.mean(scores)
    assert len(scores == 200)
    assert np.round(acc, 2) == 0.95