def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, multi_class='ovr', solver='liblinear') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(f, 3) == 1.053, f assert round(p, 3) == 0.509, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(f, 3) == 34.934, f assert round(p, 3) == 0.001, p
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, solver='liblinear', multi_class='ovr') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(f, 3) == 1.053, f assert round(p, 3) == 0.509, p f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) if Version(sklearn_version) < Version('0.20'): assert round(f, 3) == -1.510, f assert round(p, 3) == 0.191, p else: assert round(f, 3) == 1.046, f assert round(p, 3) == 0.513, p
def display_f_test(inputs_from_feature_selection, dataset): print("ftest results: (f, p) =") print( " NN, KNN: ", combined_ftest_5x2cv(NeuralNet(10000).clf, Knn(2).clf, inputs_from_feature_selection, dataset.target, random_seed=1)) print( " NN, SVM: ", combined_ftest_5x2cv(NeuralNet(10000).clf, Svm().clf, inputs_from_feature_selection, dataset.target, random_seed=1)) print( " KNN, SVM: ", combined_ftest_5x2cv(Knn(2).clf, Svm().clf, inputs_from_feature_selection, dataset.target, random_seed=1))
def test_regressor(): X, y = boston_housing_data() reg1 = Lasso(random_state=1) reg2 = Ridge(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = reg1.fit(X_train, y_train).score(X_test, y_test) score2 = reg2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.66, score1 assert round(score2, 2) == 0.68, score2 f, p = combined_ftest_5x2cv(estimator1=reg1, estimator2=reg2, X=X, y=y, random_seed=1) assert round(f, 3) == 3.211, f assert round(p, 3) == 0.105, p
p_valor_teste_reg = [] from mlxtend.evaluate import combined_ftest_5x2cv import time start = time.time() for model_reg_name_A in models_reg_A: model_reg_tst_A = models_reg_A[model_reg_name_A] for model_reg_name_B in models_reg_B: if model_reg_name_A != model_reg_name_B: model_reg_tst_B = models_reg_B[model_reg_name_B] print(model_reg_name_A, model_reg_name_B) f, p = combined_ftest_5x2cv( estimator1 = model_reg_tst_A, estimator2 = model_reg_tst_B, X = X_reg, y = Y_reg, scoring = custom_r2_by_correlation, random_seed = 42 ) model_reg_A.append(model_reg_name_A) model_reg_B.append(model_reg_name_B) f_valor_teste_reg.append(f) p_valor_teste_reg.append(p) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start)/60)) Tempo de Execução: 826.12 min Resultado = pd.DataFrame({'Modelo A': model_reg_A, 'Modelo B': model_reg_B, 'Valor p': p_valor_teste_reg, 'Valor f': f_valor_teste_reg}) # Create a Pandas Excel Writer Using XlsxWriter as the Engine. resultado_combined_ftest_regressao = pd.ExcelWriter('V5/resultado_combined_ftest_r2_score_xgboost_regressor.xlsx', engine = 'xlsxwriter')
from sklearn.metrics import make_scorer from sklearn.metrics import matthews_corrcoef import time start = time.time() for model_name_A in models_A: model_tst_A = models_A[model_name_A] for model_name_B in models_B: if model_name_A != model_name_B: model_tst_B = models_B[model_name_B] print(model_name_A, model_name_B) f, p = combined_ftest_5x2cv(estimator1=model_tst_A, estimator2=model_tst_B, X=X, y=Y, scoring=make_scorer(matthews_corrcoef), random_seed=42) model_A.append(model_name_A) model_B.append(model_name_B) f_valor_teste.append(f) p_valor_teste.append(p) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start) / 60)) #Tempo de Execução: 751.73 min Resultado = pd.DataFrame({ 'Modelo A': model_A, 'Modelo B': model_B,
from mlxtend.evaluate import combined_ftest_5x2cv from sklearn.metrics import mean_squared_error import time start = time.time() for model_reg_name_A in models_reg_A: model_reg_tst_A = models_reg_A[model_reg_name_A] for model_reg_name_B in models_reg_B: if model_reg_name_A != model_reg_name_B: model_reg_tst_B = models_reg_B[model_reg_name_B] print(model_reg_name_A, model_reg_name_B) f, p = combined_ftest_5x2cv( estimator1 = model_reg_tst_A, estimator2 = model_reg_tst_B, X = X_reg, y = Y_reg, scoring = 'neg_mean_squared_error', random_seed = 42 ) model_reg_A.append(model_reg_name_A) model_reg_B.append(model_reg_name_B) f_valor_teste_reg.append(f) p_valor_teste_reg.append(p) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start)/60)) Tempo de Execução: 288.52 min Resultado = pd.DataFrame({'Modelo A': model_reg_A, 'Modelo B': model_reg_B, 'Valor p': p_valor_teste_reg, 'Valor f': f_valor_teste_reg}) # Create a Pandas Excel Writer Using XlsxWriter as the Engine. resultado_combined_ftest_regressao = pd.ExcelWriter('V5/resultado_combined_ftest_rmse_regressao.xlsx', engine = 'xlsxwriter')
from mlxtend.evaluate import combined_ftest_5x2cv from sklearn.metrics import r2_score import time start = time.time() for model_reg_name_A in models_reg_A: model_reg_tst_A = models_reg_A[model_reg_name_A] for model_reg_name_B in models_reg_B: if model_reg_name_A != model_reg_name_B: model_reg_tst_B = models_reg_B[model_reg_name_B] print(model_reg_name_A, model_reg_name_B) f, p = combined_ftest_5x2cv(estimator1=model_reg_tst_A, estimator2=model_reg_tst_B, X=X_reg, y=Y_reg, scoring='r2', random_seed=42) model_reg_A.append(model_reg_name_A) model_reg_B.append(model_reg_name_B) f_valor_teste_reg.append(f) p_valor_teste_reg.append(p) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start) / 60)) Resultado = pd.DataFrame({ 'Modelo A': model_reg_A, 'Modelo B': model_reg_B, 'Valor p': p_valor_teste_reg, 'Valor f': f_valor_teste_reg
p-value: 0.183 #5X2CV COMBINED F TEST (7,8) from mlxtend.evaluate import combined_ftest_5x2cv from sklearn.metrics import make_scorer from sklearn.metrics import matthews_corrcoef import time start = time.time() f, p = combined_ftest_5x2cv( estimator1 = classifier_lgbm_7, estimator2 = classifier_lgbm_8, X = X, y = Y, scoring = make_scorer(matthews_corrcoef), random_seed = 42 ) end = time.time() print("Tempo de Execução: {:.2f} min".format((end - start)/60)) Tempo de Execução: 42.50 min print('F statistic: %.3f' % f) print('p value: %.3f' % p) F statistic: 0.656 p value: 0.734 #5X2CV COMBINED F TEST (8,28)