def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X,
                                 y=y,
                                 scoring='accuracy',
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X,
                                 y=y,
                                 scoring='f1_macro',
                                 random_seed=1)

    assert round(t, 3) == -1.872, t
    assert round(p, 3) == 0.094, p
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              solver='liblinear',
                              multi_class='ovr')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.5,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.96, round(score1, 2)
    assert round(score2, 2) == 0.91, round(score2, 2)

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X, y=y,
                                 scoring='accuracy',
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X, y=y,
                                 scoring='recall_micro',
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              multi_class='ovr',
                              solver='liblinear')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X,
                                 y=y,
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X,
                                 y=y,
                                 random_seed=1)

    assert round(t, 3) == 13.491, t
    assert round(p, 3) == 0.000, p
    def statistical_significance_tests(self):
        print("============================================================")
        print("============================================================")
        print("K-fold cross-validated paired t-test:")
        print()

        t, p = paired_ttest_kfold_cv(estimator1=self.svm_model,
                                     estimator2=self.rf_model,
                                     X=self.pre.x, y=self.pre.y,
                                     random_seed=42)
        print('t statistic: %.3f' % t)
        print('p value: %.3f' % p)
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              multi_class='ovr',
                              solver='liblinear')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X, y=y,
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X, y=y,
                                 random_seed=1)

    assert round(t, 3) == 13.491, t
    assert round(p, 3) == 0.000, p
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              solver='liblinear',
                              multi_class='ovr')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.5,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.96, round(score1, 2)
    assert round(score2, 2) == 0.91, round(score2, 2)

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X,
                                 y=y,
                                 scoring='accuracy',
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p

    t, p = paired_ttest_kfold_cv(estimator1=clf1,
                                 estimator2=clf2,
                                 X=X,
                                 y=y,
                                 scoring='recall_micro',
                                 random_seed=1)

    assert round(t, 3) == -1.861, t
    assert round(p, 3) == 0.096, p
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    t, p = paired_ttest_kfold_cv(estimator1=reg1,
                                 estimator2=reg2,
                                 X=X, y=y,
                                 random_seed=1)

    assert round(t, 3) == -0.549, t
    assert round(p, 3) == 0.596, p
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    t, p = paired_ttest_kfold_cv(estimator1=reg1,
                                 estimator2=reg2,
                                 X=X,
                                 y=y,
                                 random_seed=1)

    assert round(t, 3) == -0.549, t
    assert round(p, 3) == 0.596, p
Exemple #9
0
temp_ = t10[['State', 'ReqMem', 'Timelimit', 'role']]

newdf_ = temp_

fdf_ = newdf_
# classification for State = 1, failed

xt_ = fdf_[['State', 'ReqMem', 'Timelimit', 'role']]
xt_.fillna(0)

a_ = xt_.sample(frac=0.1)
xt_ = preprocessing.StandardScaler().fit_transform(a_)

x_ = xt_[:, 1:3]
y_ = xt_[:, 0]

y_ = y_.astype('int')

clf2 = LogisticRegression()
clf1 = GaussianNB()

t, p = paired_ttest_kfold_cv(estimator1=clf1,
                             estimator2=clf2,
                             X=x,
                             y=y,
                             random_seed=1)

print('t statistic: %.3f' % t)
print('p value: %.3f' % p)
print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

t statistic: -3.605
p value: 0.015

#K-FOLD CROSS-VALIDATED PAIRED T TEST (7,8)
from mlxtend.evaluate import paired_ttest_kfold_cv

import time
start = time.time()

t, p = paired_ttest_kfold_cv( estimator1 = classifier_lgbm_7,
                              estimator2 = classifier_lgbm_8,
                              X = X, 
                              y = Y,
                              scoring = make_scorer(matthews_corrcoef),
                              random_seed = 42 )

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))

Tempo de Execução: 246.89 min

print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

t statistic: 0.238
p value: 0.817

#K-FOLD CROSS-VALIDATED PAIRED T TEST (8,28)
Exemple #11
0
    lw = 2
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],label='micro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["micro"]),color='deeppink' , linestyle=':', linewidth=4)
    plt.plot(fpr["macro"], tpr["macro"],label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),color='navy', linestyle=':', linewidth=4)
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue','red','green','yellow'])
    for i, color in zip(range(len(classes)), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,label='ROC curve of class {0} (area = {1:0.2f})'''.format(classes[i], roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()

t, p = paired_ttest_kfold_cv(estimator1=NB,
                              estimator2=DT,
                              X=X, y=y,
                              random_seed=1)
print('t statistic: %.3f' % t)
print('p value: %.3f' % p)