def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X,
                                  y=y,
                                  scoring='accuracy',
                                  random_seed=1)

    assert round(t, 3) == -1.809, t
    assert round(p, 3) == 0.081, p

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X,
                                  y=y,
                                  scoring='f1_macro',
                                  random_seed=1)

    assert round(t, 3) == -1.690, t
    assert round(p, 3) == 0.102, p
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(multi_class='ovr',
                              solver='liblinear',
                              random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X,
                                  y=y,
                                  random_seed=1)

    if Version(sklearn_version) < Version("0.20"):
        assert round(t, 3) == -1.809, t
        assert round(p, 3) == 0.081, p
    else:
        assert round(t, 3) == -1.702, t
        assert round(p, 3) == 0.10, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X,
                                  y=y,
                                  random_seed=1)

    assert round(t, 3) == 39.214, t
    assert round(p, 3) == 0.000, p
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(multi_class='ovr',
                              solver='liblinear',
                              random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X,
                                  y=y,
                                  scoring='accuracy',
                                  random_seed=1)

    if Version(sklearn_version) < Version('0.20'):
        assert round(t, 3) == -1.809, t
        assert round(p, 3) == 0.081, p
    else:
        assert round(t, 3) == -1.702, t
        assert round(p, 3) == 0.1, p

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X,
                                  y=y,
                                  scoring='f1_macro',
                                  random_seed=1)

    if Version(sklearn_version) < Version("0.20"):
        assert round(t, 3) == -1.690, t
        assert round(p, 3) == 0.102, p
    else:
        assert round(t, 3) == -1.561, t
        assert round(p, 3) == 0.129, p
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(multi_class='ovr',
                              solver='liblinear',
                              random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X, y=y,
                                  random_seed=1)

    if Version(sklearn_version) < Version("0.20"):
        assert round(t, 3) == -1.809, t
        assert round(p, 3) == 0.081, p
    else:
        assert round(t, 3) == -1.702, t
        assert round(p, 3) == 0.10, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X, y=y,
                                  random_seed=1)

    assert round(t, 3) == 39.214, t
    assert round(p, 3) == 0.000, p
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(multi_class='ovr',
                              solver='liblinear',
                              random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X, y=y,
                                  scoring='accuracy',
                                  random_seed=1)

    if Version(sklearn_version) < Version('0.20'):
        assert round(t, 3) == -1.809, t
        assert round(p, 3) == 0.081, p
    else:
        assert round(t, 3) == -1.702, t
        assert round(p, 3) == 0.1, p

    t, p = paired_ttest_resampled(estimator1=clf1,
                                  estimator2=clf2,
                                  X=X, y=y,
                                  scoring='f1_macro',
                                  random_seed=1)

    if Version(sklearn_version) < Version("0.20"):
        assert round(t, 3) == -1.690, t
        assert round(p, 3) == 0.102, p
    else:
        assert round(t, 3) == -1.561, t
        assert round(p, 3) == 0.129, p
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    t, p = paired_ttest_resampled(estimator1=reg1,
                                  estimator2=reg2,
                                  X=X, y=y,
                                  random_seed=1)

    assert round(t, 3) == -7.697, t
    assert round(p, 3) == 0.000, p
print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

t statistic: -4.961
p value: 0.001

#RESAMPLED PAIRED T TEST (7,8)
from mlxtend.evaluate import paired_ttest_resampled

import time
start = time.time()

t, p = paired_ttest_resampled( estimator1 = classifier_lgbm_7,
                               estimator2 = classifier_lgbm_8,
                               X = X, 
                               y = Y,
                               scoring = make_scorer(matthews_corrcoef),
                               random_seed = 42 )

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))

Tempo de Execução: 611.85 min

print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

t statistic: -1.511
p value: 0.142

#RESAMPLED PAIRED T TEST (8, 28)