예제 #1
0
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              multi_class='ovr',
                              solver='liblinear')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X,
                                y=y,
                                random_seed=1)

    assert round(f, 3) == 1.053, f
    assert round(p, 3) == 0.509, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X,
                                y=y,
                                random_seed=1)

    assert round(f, 3) == 34.934, f
    assert round(p, 3) == 0.001, p
예제 #2
0
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              multi_class='ovr',
                              solver='liblinear')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X, y=y,
                                random_seed=1)

    assert round(f, 3) == 1.053, f
    assert round(p, 3) == 0.509, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X, y=y,
                                random_seed=1)

    assert round(f, 3) == 34.934, f
    assert round(p, 3) == 0.001, p
예제 #3
0
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              solver='liblinear',
                              multi_class='ovr')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X,
                                y=y,
                                scoring='accuracy',
                                random_seed=1)

    assert round(f, 3) == 1.053, f
    assert round(p, 3) == 0.509, p

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X,
                                y=y,
                                scoring='f1_macro',
                                random_seed=1)

    if Version(sklearn_version) < Version('0.20'):
        assert round(f, 3) == -1.510, f
        assert round(p, 3) == 0.191, p
    else:
        assert round(f, 3) == 1.046, f
        assert round(p, 3) == 0.513, p
예제 #4
0
파일: main.py 프로젝트: mles2/UM
def display_f_test(inputs_from_feature_selection, dataset):
    print("ftest results: (f, p) =")
    print(
        " NN, KNN: ",
        combined_ftest_5x2cv(NeuralNet(10000).clf,
                             Knn(2).clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    print(
        " NN, SVM: ",
        combined_ftest_5x2cv(NeuralNet(10000).clf,
                             Svm().clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    print(
        " KNN, SVM: ",
        combined_ftest_5x2cv(Knn(2).clf,
                             Svm().clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
예제 #5
0
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1, solver='liblinear',
                              multi_class='ovr')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X, y=y,
                                scoring='accuracy',
                                random_seed=1)

    assert round(f, 3) == 1.053, f
    assert round(p, 3) == 0.509, p

    f, p = combined_ftest_5x2cv(estimator1=clf1,
                                estimator2=clf2,
                                X=X, y=y,
                                scoring='f1_macro',
                                random_seed=1)

    if Version(sklearn_version) < Version('0.20'):
        assert round(f, 3) == -1.510, f
        assert round(p, 3) == 0.191, p
    else:
        assert round(f, 3) == 1.046, f
        assert round(p, 3) == 0.513, p
예제 #6
0
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    f, p = combined_ftest_5x2cv(estimator1=reg1,
                                estimator2=reg2,
                                X=X, y=y,
                                random_seed=1)

    assert round(f, 3) == 3.211, f
    assert round(p, 3) == 0.105, p
예제 #7
0
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    f, p = combined_ftest_5x2cv(estimator1=reg1,
                                estimator2=reg2,
                                X=X,
                                y=y,
                                random_seed=1)

    assert round(f, 3) == 3.211, f
    assert round(p, 3) == 0.105, p
p_valor_teste_reg = []

from mlxtend.evaluate import combined_ftest_5x2cv

import time
start = time.time()

for model_reg_name_A in models_reg_A:
	model_reg_tst_A   = models_reg_A[model_reg_name_A]
	for model_reg_name_B in models_reg_B:
		if model_reg_name_A != model_reg_name_B:        
			model_reg_tst_B = models_reg_B[model_reg_name_B]
			print(model_reg_name_A, model_reg_name_B)
			f, p = combined_ftest_5x2cv( estimator1 = model_reg_tst_A,
										 estimator2 = model_reg_tst_B,
                         	             X = X_reg, 
                          	             y = Y_reg,
                          	             scoring = custom_r2_by_correlation,
                          	             random_seed = 42 )
			model_reg_A.append(model_reg_name_A)
			model_reg_B.append(model_reg_name_B)
			f_valor_teste_reg.append(f)
			p_valor_teste_reg.append(p)
        
end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))
Tempo de Execução: 826.12 min
        
Resultado = pd.DataFrame({'Modelo A': model_reg_A, 'Modelo B': model_reg_B, 'Valor p': p_valor_teste_reg, 'Valor f': f_valor_teste_reg})

# Create a Pandas Excel Writer Using XlsxWriter as the Engine.
resultado_combined_ftest_regressao = pd.ExcelWriter('V5/resultado_combined_ftest_r2_score_xgboost_regressor.xlsx', engine = 'xlsxwriter')
예제 #9
0
from sklearn.metrics import make_scorer

from sklearn.metrics import matthews_corrcoef

import time
start = time.time()

for model_name_A in models_A:
    model_tst_A = models_A[model_name_A]
    for model_name_B in models_B:
        if model_name_A != model_name_B:
            model_tst_B = models_B[model_name_B]
            print(model_name_A, model_name_B)
            f, p = combined_ftest_5x2cv(estimator1=model_tst_A,
                                        estimator2=model_tst_B,
                                        X=X,
                                        y=Y,
                                        scoring=make_scorer(matthews_corrcoef),
                                        random_seed=42)
            model_A.append(model_name_A)
            model_B.append(model_name_B)
            f_valor_teste.append(f)
            p_valor_teste.append(p)

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start) / 60))

#Tempo de Execução: 751.73 min

Resultado = pd.DataFrame({
    'Modelo A': model_A,
    'Modelo B': model_B,
예제 #10
0
from mlxtend.evaluate import combined_ftest_5x2cv

from sklearn.metrics import mean_squared_error

import time
start = time.time()
      
for model_reg_name_A in models_reg_A:
	model_reg_tst_A = models_reg_A[model_reg_name_A]
	for model_reg_name_B in models_reg_B:
		if model_reg_name_A != model_reg_name_B:            
			model_reg_tst_B = models_reg_B[model_reg_name_B]
			print(model_reg_name_A, model_reg_name_B)
			f, p = combined_ftest_5x2cv( estimator1 = model_reg_tst_A,
									     estimator2 = model_reg_tst_B,
                                         X = X_reg, 
                                         y = Y_reg,
                                         scoring = 'neg_mean_squared_error',
                                         random_seed = 42 )
			model_reg_A.append(model_reg_name_A)
			model_reg_B.append(model_reg_name_B)
			f_valor_teste_reg.append(f)
			p_valor_teste_reg.append(p)
        
end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))
Tempo de Execução: 288.52 min 
        
Resultado = pd.DataFrame({'Modelo A': model_reg_A, 'Modelo B': model_reg_B, 'Valor p': p_valor_teste_reg, 'Valor f': f_valor_teste_reg})

# Create a Pandas Excel Writer Using XlsxWriter as the Engine.
resultado_combined_ftest_regressao = pd.ExcelWriter('V5/resultado_combined_ftest_rmse_regressao.xlsx', engine = 'xlsxwriter')
from mlxtend.evaluate import combined_ftest_5x2cv

from sklearn.metrics import r2_score

import time
start = time.time()

for model_reg_name_A in models_reg_A:
    model_reg_tst_A = models_reg_A[model_reg_name_A]
    for model_reg_name_B in models_reg_B:
        if model_reg_name_A != model_reg_name_B:
            model_reg_tst_B = models_reg_B[model_reg_name_B]
            print(model_reg_name_A, model_reg_name_B)
            f, p = combined_ftest_5x2cv(estimator1=model_reg_tst_A,
                                        estimator2=model_reg_tst_B,
                                        X=X_reg,
                                        y=Y_reg,
                                        scoring='r2',
                                        random_seed=42)
            model_reg_A.append(model_reg_name_A)
            model_reg_B.append(model_reg_name_B)
            f_valor_teste_reg.append(f)
            p_valor_teste_reg.append(p)

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start) / 60))

Resultado = pd.DataFrame({
    'Modelo A': model_reg_A,
    'Modelo B': model_reg_B,
    'Valor p': p_valor_teste_reg,
    'Valor f': f_valor_teste_reg
p-value: 0.183

#5X2CV COMBINED F TEST (7,8)

from mlxtend.evaluate import combined_ftest_5x2cv

from sklearn.metrics import make_scorer

from sklearn.metrics import matthews_corrcoef

import time
start = time.time()

f, p = combined_ftest_5x2cv( estimator1 = classifier_lgbm_7,
                             estimator2 = classifier_lgbm_8,
                             X = X, 
                             y = Y,
                             scoring = make_scorer(matthews_corrcoef),
                             random_seed = 42 )

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))

Tempo de Execução: 42.50 min

print('F statistic: %.3f' % f)
print('p value: %.3f' % p)

F statistic: 0.656
p value: 0.734

#5X2CV COMBINED F TEST (8,28)