def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(t, 3) == -1.809, t assert round(p, 3) == 0.081, p t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) assert round(t, 3) == -1.690, t assert round(p, 3) == 0.102, p
def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(multi_class='ovr', solver='liblinear', random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) if Version(sklearn_version) < Version("0.20"): assert round(t, 3) == -1.809, t assert round(p, 3) == 0.081, p else: assert round(t, 3) == -1.702, t assert round(p, 3) == 0.10, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == 39.214, t assert round(p, 3) == 0.000, p
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(multi_class='ovr', solver='liblinear', random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) if Version(sklearn_version) < Version('0.20'): assert round(t, 3) == -1.809, t assert round(p, 3) == 0.081, p else: assert round(t, 3) == -1.702, t assert round(p, 3) == 0.1, p t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) if Version(sklearn_version) < Version("0.20"): assert round(t, 3) == -1.690, t assert round(p, 3) == 0.102, p else: assert round(t, 3) == -1.561, t assert round(p, 3) == 0.129, p
def test_regressor(): X, y = boston_housing_data() reg1 = Lasso(random_state=1) reg2 = Ridge(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = reg1.fit(X_train, y_train).score(X_test, y_test) score2 = reg2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.66, score1 assert round(score2, 2) == 0.68, score2 t, p = paired_ttest_resampled(estimator1=reg1, estimator2=reg2, X=X, y=y, random_seed=1) assert round(t, 3) == -7.697, t assert round(p, 3) == 0.000, p
print('t statistic: %.3f' % t) print('p value: %.3f' % p) t statistic: -4.961 p value: 0.001 #RESAMPLED PAIRED T TEST (7,8) from mlxtend.evaluate import paired_ttest_resampled import time start = time.time() t, p = paired_ttest_resampled( estimator1 = classifier_lgbm_7, estimator2 = classifier_lgbm_8, X = X, y = Y, scoring = make_scorer(matthews_corrcoef), random_seed = 42 ) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start)/60)) Tempo de Execução: 611.85 min print('t statistic: %.3f' % t) print('p value: %.3f' % p) t statistic: -1.511 p value: 0.142 #RESAMPLED PAIRED T TEST (8, 28)