Ejemplo n.º 1
0
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1)
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X, y=y,
                              scoring='accuracy',
                              random_seed=1)

    assert round(t, 3) == -1.539, t
    assert round(p, 3) == 0.184, p

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X, y=y,
                              scoring='f1_macro',
                              random_seed=1)

    assert round(t, 3) == -1.510, t
    assert round(p, 3) == 0.191, p
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              multi_class='ovr',
                              solver='liblinear')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X,
                              y=y,
                              random_seed=1)

    assert round(t, 3) == -1.539, t
    assert round(p, 3) == 0.184, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X,
                              y=y,
                              random_seed=1)

    assert round(t, 3) == 5.386, t
    assert round(p, 3) == 0.003, p
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              solver='liblinear',
                              multi_class='ovr')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X,
                              y=y,
                              scoring='accuracy',
                              random_seed=1)

    assert round(t, 3) == -1.539, t
    assert round(p, 3) == 0.184, p

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X,
                              y=y,
                              scoring='f1_macro',
                              random_seed=1)

    if Version(sklearn_version) < Version('0.20'):
        assert round(t, 3) == -1.510, t
        assert round(p, 3) == 0.191, p
    else:
        assert round(t, 3) == -1.506, t
        assert round(p, 3) == 0.192, p
Ejemplo n.º 4
0
def test_classifier_defaults():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1,
                              multi_class='ovr',
                              solver='liblinear')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X, y=y,
                              random_seed=1)

    assert round(t, 3) == -1.539, t
    assert round(p, 3) == 0.184, p

    # change maxdepth of decision tree classifier

    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score3, 2) == 0.63

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X, y=y,
                              random_seed=1)

    assert round(t, 3) == 5.386, t
    assert round(p, 3) == 0.003, p
Ejemplo n.º 5
0
def hypothesis_testing_between_two_models(estimator1,
                                          estimator2,
                                          X,
                                          y,
                                          scoring='explained_variance',
                                          random_seed=1):
    t, p = paired_ttest_5x2cv(estimator1=model1,
                              estimator2=model2,
                              X=X,
                              y=y,
                              scoring=scoring,
                              random_seed=random_seed)
    return t, p
Ejemplo n.º 6
0
def test_scoring():
    X, y = iris_data()
    clf1 = LogisticRegression(random_state=1, solver='liblinear',
                              multi_class='ovr')
    clf2 = DecisionTreeClassifier(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.97
    assert round(score2, 2) == 0.95

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X, y=y,
                              scoring='accuracy',
                              random_seed=1)

    assert round(t, 3) == -1.539, t
    assert round(p, 3) == 0.184, p

    t, p = paired_ttest_5x2cv(estimator1=clf1,
                              estimator2=clf2,
                              X=X, y=y,
                              scoring='f1_macro',
                              random_seed=1)

    if Version(sklearn_version) < Version('0.20'):
        assert round(t, 3) == -1.510, t
        assert round(p, 3) == 0.191, p
    else:
        assert round(t, 3) == -1.506, t
        assert round(p, 3) == 0.192, p
Ejemplo n.º 7
0
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    t, p = paired_ttest_5x2cv(estimator1=reg1,
                              estimator2=reg2,
                              X=X, y=y,
                              random_seed=1)

    assert round(t, 3) == -0.599, t
    assert round(p, 3) == 0.575, p
def test_regressor():
    X, y = boston_housing_data()
    reg1 = Lasso(random_state=1)
    reg2 = Ridge(random_state=1)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.25,
                         random_state=123)

    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

    assert round(score1, 2) == 0.66, score1
    assert round(score2, 2) == 0.68, score2

    t, p = paired_ttest_5x2cv(estimator1=reg1,
                              estimator2=reg2,
                              X=X, y=y,
                              random_seed=1)

    assert round(t, 3) == -0.599, t
    assert round(p, 3) == 0.575, p
#Naive Bayes
model_nb = BernoulliNB()
model_nb.fit(X_train_smote, y_train_smote)
result_two = model_nb.score(X_test, y_test)
print("Naive Bayes Accuracy = %0.2f%%"% (result_two*100))

#Decison Tree
model_tree = DecisionTreeClassifier(min_samples_split=5)
model_tree.fit(X_train_smote, y_train_smote)
result_three = model_tree.score(X_test, y_test)
print("Decision Tree Accuracy = %0.2f%%" % (result_three*100))

#Gradient Boosting
model_gb = GradientBoostingClassifier()
model_gb.fit(X_train_smote, y_train_smote)
result_four = model_gb.score(X_test, y_test)
print("Gradient Boosting Accuracy = %0.2f%%" % (result_four*100))

#To determine the final model for this problem, I will a 5x2 Cross Validation paired t test
#This test is used to determine statistical difference between two classifiers
#Citation: http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_5x2cv/
print('\nResults of 5x2 Cross Validation Paired T-Test: ')
t, p = paired_ttest_5x2cv(estimator1=model_rf, estimator2=model_nb, X=X_train_smote, y=y_train_smote, random_seed=1)
print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

if (p>t):
    print('5x2 CV: The null hypothesis is not rejected. There is no statistical difference between classifiers.')
else:
    print('5x2 CV: The null hypothese is rejected. There is a statistical difference between classifiers.')
Ejemplo n.º 10
0
    def compareModels_ttest(self, models=[], X=None, y=None, a=0.05):
        comparison_rows = []
        headers = [
            'model pairs', 'model1', 'mean1', 'std1', 'model2', 'mean2',
            'std2', 't-stat', 'p-val', 'sig/notsig'
        ]
        print(*headers)
        models = list(self.models_dict.keys())
        assert len(
            models
        ) >= 2, 'there must be at least 2 models to run ttest statistical comparison'
        betterClassifier = models[0]
        betterCV = RepeatedStratifiedKFold(n_splits=10,
                                           n_repeats=2,
                                           random_state=self.random_state)
        betterScore = cross_val_score(
            self.models_dict[betterClassifier].getBuiltModel(),
            X,
            y,
            scoring='accuracy',
            cv=betterCV)
        betterMeanScore = np.mean(betterScore)
        betterStdDev = np.std(betterScore)
        comparison_rows = []
        for model in models[1:]:
            row = [f'{betterClassifier} v {model}']
            cv = RepeatedStratifiedKFold(n_splits=10,
                                         n_repeats=2,
                                         random_state=self.random_state)
            score = cross_val_score(self.models_dict[model].getBuiltModel(),
                                    X,
                                    y,
                                    scoring='accuracy',
                                    cv=cv)
            meanScore = np.mean(score)
            stdDev = np.std(score)
            if meanScore > betterMeanScore:
                # this is better classifier
                row.extend([model, f'{meanScore:.5f}*', f'{stdDev:.5f}'])
                row.extend([
                    betterClassifier, f'{betterMeanScore:.5f}',
                    f'{betterStdDev:.5f}'
                ])
                betterClassifier = model
                betterMeanScore = meanScore
                betterStdDev = stdDev
            else:
                row.extend([
                    betterClassifier, f'{betterMeanScore:.5f}*',
                    f'{betterStdDev:.5f}'
                ])
                row.extend([model, f'{meanScore:.5f}', f'{stdDev:.5f}'])

            t, p = paired_ttest_5x2cv(
                estimator1=self.models_dict[betterClassifier].getBuiltModel(),
                estimator2=self.models_dict[model].getBuiltModel(),
                X=X,
                y=y,
                scoring='accuracy')
            row.extend([f'{t:.3f}', f'{p:.3f}'])
            if p <= a:
                row.append('sig')
            else:
                row.append('notsig')
            comparison_rows.append(row)
            print(*row)
        print(tabulate(comparison_rows, headers=headers))
Ejemplo n.º 11
0
    def compareModels_2x5cv(self, models=[], X=None, y=None, a=0.05):
        comparison_rows = []
        headers = [
            'models', 'model1', 'mean1', 'std1', 'model2', 'mean2', 'std2',
            'sig/notsig'
        ]

        comparisons_ran = dict()

        for model1 in self.models_dict.keys():
            if not model1 in comparisons_ran:
                comparisons_ran[model1] = []
            for model2 in self.models_dict.keys():
                if not model2 in comparisons_ran:
                    comparisons_ran[model2] = []
                if model1 != model2 and (model1
                                         not in comparisons_ran[model2]) and (
                                             model2
                                             not in comparisons_ran[model1]):
                    row = ['{} & {}'.format(model1, model2)]

                    row.append(model1)
                    cv1 = RepeatedStratifiedKFold(
                        n_splits=10,
                        n_repeats=2,
                        random_state=self.random_state)
                    scores1 = cross_val_score(
                        self.models_dict[model1].getBuiltModel(),
                        X,
                        y,
                        scoring='accuracy',
                        cv=cv1)

                    row.append(model2)
                    cv2 = RepeatedStratifiedKFold(
                        n_splits=10,
                        n_repeats=2,
                        random_state=self.random_state)
                    scores2 = cross_val_score(
                        self.models_dict[model2].getBuiltModel(),
                        X,
                        y,
                        scoring='accuracy',
                        cv=cv1)

                    meanScore1 = np.mean(scores1)
                    meanScore2 = np.mean(scores2)
                    row.append(f'{np.std(scores1):.5f}')
                    row.append(f'{np.std(scores2):.5f}')

                    if meanScore1 > meanScore2:
                        row.append(f'*{meanScore1:.5f}')
                    else:
                        row.append(f'*{meanScore2:.5f}')

                    t, p = paired_ttest_5x2cv(
                        estimator1=self.models_dict[model1].getBuiltModel(),
                        estimator2=self.models_dict[model2].getBuiltModel(),
                        X=X,
                        y=y,
                        scoring='accuracy')
                    if p <= a:
                        row.append('sig')
                    else:
                        row.append('notsig')
                    comparisons_ran[model1].append(model2)
                    comparisons_ran[model2].append(model1)
                    comparison_rows.append(row)

        print(tabulate(comparison_rows, headers=headers))
Ejemplo n.º 12
0
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from mlxtend.evaluate import paired_ttest_5x2cv

iris = datasets.load_iris()

# Logistic Regression
regressionModel = LogisticRegression()

# KNN
knnModel = KNeighborsClassifier(n_neighbors=3)

# Calculate 5x2 paired t test
t, p = paired_ttest_5x2cv(estimator1=regressionModel,
                          estimator2=knnModel,
                          X=iris.data,
                          y=iris.target,
                          random_seed=1)

print('t statistic: %.3f' % t)
print('p value: %.3f' % p)
print('statistic=%.3f, p-value=%.3f' % (t, p))
alpha = 0.05
if p > alpha:
    print('Same proportions of errors (fail to reject H0)')
else:
    print('Different proportions of errors (reject H0)')
Ejemplo n.º 13
0
# NO DOCUMENTATION ON CV SPLITS. SO PROBABLY SHUFFELS TIME SERIES DATA TO NON-TIME SERIES DATA
debug = False
for i in range(len(models)):
    for j in range(len(models)):
        if j <= i:
            continue
        else:
            # wrap models i, j in pipelines
            pipeline_i = Pipeline(steps=[('m', models[i])])
            pipeline_j = Pipeline(steps=[('m', models[j])])
            # show all warnings on fail on exception if debugging
            if debug:
                # Check hypothesis between two models
                t, p = paired_ttest_5x2cv(estimator1=pipeline_i,
                                          estimator2=pipeline_j,
                                          X=X,
                                          y=y,
                                          scoring='explained_variance',
                                          random_seed=1)
            else:
                try:
                    with catch_warnings():
                        filterwarnings("ignore")
                        # Check hypothesis between two models
                        t, p = paired_ttest_5x2cv(estimator1=pipeline_i,
                                                  estimator2=pipeline_j,
                                                  X=X,
                                                  y=y,
                                                  scoring='explained_variance',
                                                  random_seed=1)
                except:
                    error = None
print('F statistic: %.3f' % f)
print('p value: %.3f' % p)

F statistic: 10.407
p value: 0.009

#5X2CV PAIRED T TEST (7,8)
from mlxtend.evaluate import paired_ttest_5x2cv

import time
start = time.time()

t, p = paired_ttest_5x2cv( estimator1 = classifier_lgbm_7,
                           estimator2 = classifier_lgbm_8,
                           X = X, 
                           y = Y,
                           scoring = make_scorer(matthews_corrcoef),
                           random_seed = 42)

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))

Tempo de Execução: 30.59 min

print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

t statistic: 0.341
p value: 0.747

#5X2CV PAIRED T TEST (8,28)
Ejemplo n.º 15
0
    def compareModels_tTest(self, X, y, exp_type, a=0.05):
        # Initialize list objects and set up the header row
        rows = []
        headers = [
            'model pairs', 'model1', 'mean1', 'std1', 'model2', 'mean2',
            'std2', 'sig/notsig'
        ]
        model_ids = list(self.models.keys())

        # Initialize bestModel and respective values to the first one we have
        bestModel = self.models[model_ids[0]]
        bestModel.trainCV(X,
                          y,
                          exp_type=exp_type,
                          nfolds=10,
                          nrepeats=2,
                          metrics='accuracy')
        bestScores = bestModel.getMetrics()
        bestAvg = bestScores['Accuracy']['avg']
        bestStd = bestScores['Accuracy']['std']

        for model_id in model_ids[1:]:
            # Set up the current model
            model = self.models[model_id]

            # Set the row's first element
            row = ['{} vs {}'.format(bestModel.getName(), model.getName())]

            # Train and get the results for the current model
            model.trainCV(X,
                          y,
                          exp_type=exp_type,
                          nfolds=10,
                          nrepeats=2,
                          metrics='accuracy')
            modelScores = model.getMetrics()
            modelAvg = modelScores['Accuracy']['avg']
            modelStd = modelScores['Accuracy']['std']

            # Compare the scores with the best one so far
            swap = False
            if modelAvg > bestAvg:
                # This is the best classifier so far

                # Add the previous best model's information to the row
                row.append(bestModel.getName())
                row.append('{:.4f}'.format(bestAvg))
                row.append('{:.4f}'.format(bestStd))

                # Add the current model information to the row
                row.append(model.getName() + '*')
                row.append('{:.4f}'.format(modelAvg))
                row.append('{:.4f}'.format(modelStd))

                # Set the best model as the current one
                swap = True
            else:
                # This model performed worse than the best we've seen so far

                # Add the previous best model's information to the row
                row.append(bestModel.getName() + '*')
                row.append('{:.4f}'.format(bestAvg))
                row.append('{:.4f}'.format(bestStd))

                # Add the current model information to the row
                row.append(model.getName())
                row.append('{:.4f}'.format(modelAvg))
                row.append('{:.4f}'.format(modelStd))

            # Determine whether the difference in performance is significant
            t, p = paired_ttest_5x2cv(estimator1=model.getBuiltModel(),
                                      estimator2=bestModel.getBuiltModel(),
                                      X=X,
                                      y=y,
                                      scoring='accuracy',
                                      random_seed=0)

            # Add the t, p values to the row
            #row.append('{:.3f}'.format(t))
            #row.append('{:.3f}'.format(p))

            # Add the significance determination to the row
            if p <= a:
                row.append('sig')
            else:
                row.append('notsig')

            # Add the completed row to the list of rows
            rows.append(row)

            if swap:
                bestModel = model
                bestAvg = modelAvg
                bestStd = modelStd

        # Print the table
        print(tabulate(rows, headers=headers))
Ejemplo n.º 16
0
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
import matplotlib.pyplot as plt

disp = plot_precision_recall_curve(text_clf, X_test, y_test)
disp.ax_.set_title('2-class Precision-Recall curve: '
                   'AP={0:0.2f}'.format(average_precision))

# %% space for statistical testing
#5 x 2 cross validation

from mlxtend.evaluate import paired_ttest_5x2cv

t, p = paired_ttest_5x2cv(estimator1=text_clf,
                          estimator2=voting_clf,
                          X=X,
                          y=y,
                          random_seed=1)

print('t statistic: %.3f' % t)
print('p value: %.3f' % p)

#bootstrap

# configure bootstrap
n_iterations = 500
n_size = int(len(review_random_set) * 0.50)

from sklearn.utils import resample
# run bootstrap
stats = list()