def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) assert round(t, 3) == -1.872, t assert round(p, 3) == 0.094, p
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, solver='liblinear', multi_class='ovr') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.5, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.96, round(score1, 2) assert round(score2, 2) == 0.91, round(score2, 2) t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='recall_micro', random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p
def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, multi_class='ovr', solver='liblinear') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == 13.491, t assert round(p, 3) == 0.000, p
def statistical_significance_tests(self): print("============================================================") print("============================================================") print("K-fold cross-validated paired t-test:") print() t, p = paired_ttest_kfold_cv(estimator1=self.svm_model, estimator2=self.rf_model, X=self.pre.x, y=self.pre.y, random_seed=42) print('t statistic: %.3f' % t) print('p value: %.3f' % p)
def test_regressor(): X, y = boston_housing_data() reg1 = Lasso(random_state=1) reg2 = Ridge(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = reg1.fit(X_train, y_train).score(X_test, y_test) score2 = reg2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.66, score1 assert round(score2, 2) == 0.68, score2 t, p = paired_ttest_kfold_cv(estimator1=reg1, estimator2=reg2, X=X, y=y, random_seed=1) assert round(t, 3) == -0.549, t assert round(p, 3) == 0.596, p
temp_ = t10[['State', 'ReqMem', 'Timelimit', 'role']] newdf_ = temp_ fdf_ = newdf_ # classification for State = 1, failed xt_ = fdf_[['State', 'ReqMem', 'Timelimit', 'role']] xt_.fillna(0) a_ = xt_.sample(frac=0.1) xt_ = preprocessing.StandardScaler().fit_transform(a_) x_ = xt_[:, 1:3] y_ = xt_[:, 0] y_ = y_.astype('int') clf2 = LogisticRegression() clf1 = GaussianNB() t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=x, y=y, random_seed=1) print('t statistic: %.3f' % t) print('p value: %.3f' % p)
print('t statistic: %.3f' % t) print('p value: %.3f' % p) t statistic: -3.605 p value: 0.015 #K-FOLD CROSS-VALIDATED PAIRED T TEST (7,8) from mlxtend.evaluate import paired_ttest_kfold_cv import time start = time.time() t, p = paired_ttest_kfold_cv( estimator1 = classifier_lgbm_7, estimator2 = classifier_lgbm_8, X = X, y = Y, scoring = make_scorer(matthews_corrcoef), random_seed = 42 ) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start)/60)) Tempo de Execução: 246.89 min print('t statistic: %.3f' % t) print('p value: %.3f' % p) t statistic: 0.238 p value: 0.817 #K-FOLD CROSS-VALIDATED PAIRED T TEST (8,28)
lw = 2 plt.figure() plt.plot(fpr["micro"], tpr["micro"],label='micro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["micro"]),color='deeppink' , linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"],label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),color='navy', linestyle=':', linewidth=4) colors = cycle(['aqua', 'darkorange', 'cornflowerblue','red','green','yellow']) for i, color in zip(range(len(classes)), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw,label='ROC curve of class {0} (area = {1:0.2f})'''.format(classes[i], roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.show() t, p = paired_ttest_kfold_cv(estimator1=NB, estimator2=DT, X=X, y=y, random_seed=1) print('t statistic: %.3f' % t) print('p value: %.3f' % p)