def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(t, 3) == -1.539, t assert round(p, 3) == 0.184, p t, p = paired_ttest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) assert round(t, 3) == -1.510, t assert round(p, 3) == 0.191, p
def test_gridsearch(): np.random.seed(123) meta = LogisticRegression(multi_class='ovr', solver='liblinear') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingCVClassifier(classifiers=[clf1, clf2], meta_classifier=meta, use_probas=True, shuffle=False) params = { 'meta_classifier__C': [1.0, 100.0], 'randomforestclassifier__n_estimators': [20, 200] } if Version(sklearn_version) < '0.24.1': grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5, iid=False) else: grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5) X, y = iris_data() grid.fit(X, y) mean_scores = [round(s, 2) for s in grid.cv_results_['mean_test_score']] assert mean_scores == [0.96, 0.95, 0.96, 0.95]
def test_not_fitted(): np.random.seed(123) meta = LogisticRegression(solver='liblinear', multi_class='ovr') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingClassifier(classifiers=[clf1, clf2], use_probas=True, meta_classifier=meta) X, y = iris_data() assert_raises( NotFittedError, "This StackingClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this method.", sclf.predict, X) assert_raises( NotFittedError, "This StackingClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this method.", sclf.predict_proba, X) assert_raises( NotFittedError, "This StackingClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this method.", sclf.predict_meta_features, X)
def test_use_clones(): np.random.seed(123) X, y = iris_data() meta = LogisticRegression(solver='liblinear', multi_class='ovr') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() StackingCVClassifier(classifiers=[clf1, clf2], use_clones=True, meta_classifier=meta, shuffle=False).fit(X, y) assert_raises( exceptions.NotFittedError, "This RandomForestClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this estimator.", clf1.predict, X) StackingCVClassifier(classifiers=[clf1, clf2], use_probas=True, use_clones=False, meta_classifier=meta, shuffle=False).fit(X, y) clf1.predict(X)
def test_iris_data_uci(): tmp = np.genfromtxt(fname=DATA_PATH, delimiter=',') original_uci_data_x, original_uci_data_y = tmp[:, :-1], tmp[:, -1] original_uci_data_y = original_uci_data_y.astype(int) iris_x, iris_y = iris_data() assert_array_equal(original_uci_data_x, iris_x) assert_array_equal(original_uci_data_y, iris_y)
def test_not_fitted(): np.random.seed(123) meta = LogisticRegression(multi_class='ovr', solver='liblinear') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingCVClassifier(classifiers=[clf1, clf2], use_probas=True, meta_classifier=meta, shuffle=False) X, y = iris_data() assert_raises(NotFittedError, "This StackingCVClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this method.", sclf.predict, X) assert_raises(NotFittedError, "This StackingCVClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this method.", sclf.predict_proba, X) assert_raises(NotFittedError, "This StackingCVClassifier instance is not fitted yet." " Call 'fit' with appropriate arguments" " before using this method.", sclf.predict_meta_features, X)
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, solver='liblinear', multi_class='ovr') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.5, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.96, round(score1, 2) assert round(score2, 2) == 0.91, round(score2, 2) t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='recall_micro', random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p
def main(): from mlxtend.data import iris_data from mlxtend.plotting import plot_decision_regions import matplotlib.pyplot as plt # Loading Data X, y = iris_data() X = X[:, [0, 3]] # sepal length and petal width # standardize X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() lr = SoftmaxRegression(eta=0.01, epochs=10, minibatches=1, random_seed=0) lr.fit(X, y) plot_decision_regions(X, y, clf=lr) plt.title('Softmax Regression - Gradient Descent') plt.show() plt.plot(range(len(lr.cost_)), lr.cost_) plt.xlabel('Iterations') plt.ylabel('Cost') plt.show()
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) assert round(t, 3) == -1.872, t assert round(p, 3) == 0.094, p
def test_EnsembleVoteClassifier_gridsearch(): clf1 = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1) clf2 = RandomForestClassifier(random_state=1) clf3 = GaussianNB() eclf = EnsembleVoteClassifier(clfs=[clf1, clf2, clf3], voting='soft') params = { 'logisticregression__C': [1.0, 100.0], 'randomforestclassifier__n_estimators': [20, 200] } if Version(sklearn_version) < '0.24.1': grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5, iid=False) else: grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5) X, y = iris_data() grid.fit(X, y) mean_scores = [round(s, 2) for s in grid.cv_results_['mean_test_score']] assert mean_scores == [0.95, 0.96, 0.96, 0.95]
def test_threshold(): X, y = iris_data() ax, threshold, count = ecdf(x=X[:, 0], x_label='sepal length (cm)', percentile=0.8) assert threshold == 6.5 assert count == 120
def loadData(standardlize=True): X, y = iris_data() if standardlize: X = dataStandardlize(X) train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=87) return train_X, test_X, train_y, test_y
def test_iris_data_r(): tmp = np.genfromtxt(fname=DATA_PATH, delimiter=',') original_r_data_x, original_r_data_y = tmp[:, :-1], tmp[:, -1] original_r_data_y = original_r_data_y.astype(int) original_r_data_x[34] = [4.9, 3.1, 1.5, 0.2] original_r_data_x[37] = [4.9, 3.6, 1.4, 0.1] iris_x, iris_y = iris_data(version='corrected') assert_array_equal(original_r_data_x, iris_x)
def test_pass_pca_corr_pca_out(): X, y = iris_data() pca = PCA(n_components=2) X_pca = pca.fit_transform(X) eigen = pca.explained_variance_ plot_pca_correlation_graph(X, variables_names=['1', '2', '3', '4'], X_pca=X_pca, explained_variance=eigen)
def loadDataBinary(standardlize=True): X_temp, y_temp = iris_data() X = X_temp[y_temp!=2] y = y_temp[y_temp!=2] if standardlize: X = dataStandardlize(X) y[y==0] = -1 train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=87) return train_X, test_X, train_y, test_y
def test_verbose(): np.random.seed(123) meta = LogisticRegression(solver='liblinear', multi_class='ovr') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingClassifier(classifiers=[clf1, clf2], use_probas=True, meta_classifier=meta, verbose=3) X, y = iris_data() sclf.fit(X, y)
def test__clf_with_no_proba_fail(): X, y = iris_data() clf = OneRClassifier() clf.fit(X, y) x_ref = X[15] s = ("Your `model` does not support " "`predict_proba`. Set `y_desired_proba` " " to `None` to use `predict`instead.") assert_raises(AttributeError, s, create_counterfactual, x_ref, 2, clf, X, 1., 100, 123)
def test_use_features_in_secondary_predict(): np.random.seed(123) X, y = iris_data() meta = LogisticRegression(solver='liblinear', multi_class='ovr') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingClassifier(classifiers=[clf1, clf2], use_features_in_secondary=True, meta_classifier=meta) scores = cross_val_score(sclf, X, y, cv=5, scoring='accuracy') scores_mean = (round(scores.mean(), 2)) assert scores_mean == 0.95, scores_mean
def loadData(standardlize=True): X, y = iris_data() y_one_hot = to_categorical(y, num_classes=3) if standardlize: X = dataStandardlize(X) train_X, test_X, train_y, test_y = train_test_split(X, y_one_hot, test_size=0.3, random_state=87) return train_X, test_X, train_y, test_y
def test_no_X_PCA_but_explained_variance(): with pytest.raises(ValueError, match='If `explained variance` is not None, the ' '`X_pca` values should not be `None`.'): X, y = iris_data() pca = PCA(n_components=2) pca.fit(X) eigen = pca.explained_variance_ plot_pca_correlation_graph(X, variables_names=['1', '2', '3', '4'], X_pca=None, explained_variance=eigen)
def test_X_PCA_but_no_explained_variance(): with pytest.raises( ValueError, match='If `X_pca` is not None, the `explained variance` ' 'values should not be `None`.'): X, y = iris_data() pca = PCA(n_components=2) X_pca = pca.fit_transform(X) plot_pca_correlation_graph(X, variables_names=['1', '2', '3', '4'], X_pca=X_pca, explained_variance=None)
def test_not_enough_components(): s = ( 'Number of principal components must match the number of eigenvalues. Got 2 != 1' ) with pytest.raises(ValueError, match=s): X, y = iris_data() pca = PCA(n_components=2) X_pca = pca.fit_transform(X) eigen = pca.explained_variance_ plot_pca_correlation_graph(X, variables_names=['1', '2', '3', '4'], X_pca=X_pca, explained_variance=eigen[:-1])
def test_use_features_in_secondary_predict_proba(): np.random.seed(123) X, y = iris_data() meta = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1) clf1 = RandomForestClassifier(n_estimators=10, random_state=1) clf2 = GaussianNB() sclf = StackingClassifier(classifiers=[clf1, clf2], use_features_in_secondary=True, meta_classifier=meta) sclf.fit(X, y) idx = [0, 1, 2] y_pred = sclf.predict_proba(X[idx])[:, 0] expect = np.array([0.916, 0.828, 0.889]) np.testing.assert_almost_equal(y_pred, expect, 3)
def test__clf_with_no_proba_pass(): X, y = iris_data() clf = OneRClassifier() clf.fit(X, y) x_ref = X[15] res = create_counterfactual(x_reference=x_ref, y_desired=2, model=clf, X_dataset=X, y_desired_proba=None, lammbda=100, random_seed=123) assert clf.predict(x_ref.reshape(1, -1)) == 0 assert clf.predict(res.reshape(1, -1)) == 2
def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(multi_class='ovr', solver='liblinear', random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) if Version(sklearn_version) < Version("0.20"): assert round(t, 3) == -1.809, t assert round(p, 3) == 0.081, p else: assert round(t, 3) == -1.702, t assert round(p, 3) == 0.10, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == 39.214, t assert round(p, 3) == 0.000, p
def test_gridsearch_enumerate_names(): np.random.seed(123) meta = LogisticRegression(multi_class='ovr', solver='liblinear') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingCVClassifier(classifiers=[clf1, clf1, clf2], meta_classifier=meta, shuffle=False) params = {'meta_classifier__C': [1.0, 100.0], 'randomforestclassifier-1__n_estimators': [5, 10], 'randomforestclassifier-2__n_estimators': [5, 20], 'use_probas': [True, False]} grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5, iid=False) X, y = iris_data() grid = grid.fit(X, y)
def test_01_loss_tree(): X, y = iris_data() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123, shuffle=True, stratify=y) tree = DecisionTreeClassifier(random_state=123) avg_expected_loss, avg_bias, avg_var = bias_variance_decomp( tree, X_train, y_train, X_test, y_test, loss='0-1_loss', random_seed=123) assert round(avg_expected_loss, 3) == 0.062 assert round(avg_bias, 3) == 0.022 assert round(avg_var, 3) == 0.040
def test_use_features_in_secondary_sparse_input_predict(): np.random.seed(123) X, y = iris_data() meta = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1) clf1 = RandomForestClassifier(n_estimators=10, random_state=1) sclf = StackingClassifier(classifiers=[clf1], use_features_in_secondary=True, meta_classifier=meta) scores = cross_val_score(sclf, sparse.csr_matrix(X), y, cv=5, scoring='accuracy') scores_mean = (round(scores.mean(), 2)) assert scores_mean == 0.97, scores_mean
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(multi_class='ovr', solver='liblinear', random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) if Version(sklearn_version) < Version('0.20'): assert round(t, 3) == -1.809, t assert round(p, 3) == 0.081, p else: assert round(t, 3) == -1.702, t assert round(p, 3) == 0.1, p t, p = paired_ttest_resampled(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) if Version(sklearn_version) < Version("0.20"): assert round(t, 3) == -1.690, t assert round(p, 3) == 0.102, p else: assert round(t, 3) == -1.561, t assert round(p, 3) == 0.129, p
def test__large_lambda(): X, y = iris_data() clf = LogisticRegression() clf.fit(X, y) x_ref = X[15] res = create_counterfactual(x_reference=x_ref, y_desired=2, model=clf, X_dataset=X, y_desired_proba=1., lammbda=100, random_seed=123) assert np.argmax(clf.predict_proba(x_ref.reshape(1, -1))) == 0 assert np.argmax(clf.predict_proba(res.reshape(1, -1))) == 2 assert round( (clf.predict_proba(res.reshape(1, -1))).flatten()[-1], 2) >= 0.96
def test_gridsearch(): np.random.seed(123) meta = LogisticRegression(solver='liblinear', multi_class='ovr') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta) params = { 'meta_classifier__C': [1.0, 100.0], 'randomforestclassifier__n_estimators': [20, 200] } grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5) X, y = iris_data() grid.fit(X, y) mean_scores = [round(s, 2) for s in grid.cv_results_['mean_test_score']] assert mean_scores == [0.95, 0.97, 0.96, 0.96], mean_scores
def test_EnsembleVoteClassifier_gridsearch_enumerate_names(): clf1 = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1) clf2 = RandomForestClassifier(random_state=1) eclf = EnsembleVoteClassifier(clfs=[clf1, clf1, clf2]) params = { 'logisticregression-1__C': [1.0, 100.0], 'logisticregression-2__C': [1.0, 100.0], 'randomforestclassifier__n_estimators': [5, 20], 'voting': ['hard', 'soft'] } grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5, iid=False) X, y = iris_data() grid = grid.fit(X, y)
def test_train_size(): X, y = iris_data() clf1 = LogisticRegression() clf2 = DecisionTreeClassifier() expected_err_msg = ("train_size must be of type int or float. " "Got <class 'NoneType'>.") if sys.version_info < (3, 0): expected_err_msg = expected_err_msg.replace('<class', '<type') assert_raises(ValueError, expected_err_msg, paired_ttest_resampled, clf1, clf2, X, y, test_size=None)
def test_train_size(): X, y = iris_data() clf1 = LogisticRegression(solver='liblinear', multi_class='ovr') clf2 = DecisionTreeClassifier() expected_err_msg = ("train_size must be of type int or float. " "Got <class 'NoneType'>.") if sys.version_info < (3, 0): expected_err_msg = expected_err_msg.replace('<class', '<type') assert_raises(ValueError, expected_err_msg, paired_ttest_resampled, clf1, clf2, X, y, test_size=None)
def test_gridsearch(): np.random.seed(123) meta = LogisticRegression(solver='liblinear', multi_class='ovr') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta) params = {'meta_classifier__C': [1.0, 100.0], 'randomforestclassifier__n_estimators': [20, 200]} grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5, iid=False) X, y = iris_data() grid.fit(X, y) mean_scores = [round(s, 2) for s in grid.cv_results_['mean_test_score']] assert mean_scores == [0.95, 0.97, 0.96, 0.96], mean_scores
def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, multi_class='ovr', solver='liblinear') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(f, 3) == 1.053, f assert round(p, 3) == 0.509, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(f, 3) == 34.934, f assert round(p, 3) == 0.001, p
def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, multi_class='ovr', solver='liblinear') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == -1.861, t assert round(p, 3) == 0.096, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 t, p = paired_ttest_kfold_cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == 13.491, t assert round(p, 3) == 0.000, p
def test_classifier_defaults(): X, y = iris_data() clf1 = LogisticRegression(random_state=1) clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 t, p = paired_ttest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == -1.539, t assert round(p, 3) == 0.184, p # change maxdepth of decision tree classifier clf2 = DecisionTreeClassifier(max_depth=1, random_state=1) score3 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score3, 2) == 0.63 t, p = paired_ttest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, random_seed=1) assert round(t, 3) == 5.386, t assert round(p, 3) == 0.003, p
def test_scoring(): X, y = iris_data() clf1 = LogisticRegression(random_state=1, solver='liblinear', multi_class='ovr') clf2 = DecisionTreeClassifier(random_state=1) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.25, random_state=123) score1 = clf1.fit(X_train, y_train).score(X_test, y_test) score2 = clf2.fit(X_train, y_train).score(X_test, y_test) assert round(score1, 2) == 0.97 assert round(score2, 2) == 0.95 f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='accuracy', random_seed=1) assert round(f, 3) == 1.053, f assert round(p, 3) == 0.509, p f, p = combined_ftest_5x2cv(estimator1=clf1, estimator2=clf2, X=X, y=y, scoring='f1_macro', random_seed=1) if Version(sklearn_version) < Version('0.20'): assert round(f, 3) == -1.510, f assert round(p, 3) == 0.191, p else: assert round(f, 3) == 1.046, f assert round(p, 3) == 0.513, p
# Sebastian Raschka 2014-2016 # mlxtend Machine Learning Library Extensions # Author: Sebastian Raschka <sebastianraschka.com> # # License: BSD 3 clause from mlxtend.tf_classifier import TfSoftmaxRegression from mlxtend.data import iris_data import numpy as np from nose.tools import raises X, y = iris_data() X = X[:, [0, 3]] # sepal length and petal width X_bin = X[0:100] # class 0 and class 1 y_bin = y[0:100] # class 0 and class 1 # standardize X_bin[:, 0] = (X_bin[:, 0] - X_bin[:, 0].mean()) / X_bin[:, 0].std() X_bin[:, 1] = (X_bin[:, 1] - X_bin[:, 1].mean()) / X_bin[:, 1].std() X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() def test_binary_logistic_regression_gd(): t = np.array([[-0.28, 0.95], [-2.23, 2.4]]) lr = TfSoftmaxRegression(epochs=100, eta=0.5, minibatches=1, random_seed=1)
def test_iris_invalid_choice(): with pytest.raises(TypeError) as excinfo: iris_data() assert excinfo.value.message == ('wrong-choice')
def test_import_iris_data(): X, y = iris_data() assert(X.shape[0] == 150) assert(X.shape[1] == 4) print(y.shape) assert(y.shape[0] == 150)
from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn import datasets from sklearn.model_selection import GridSearchCV from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score from sklearn.base import clone from distutils.version import LooseVersion as Version from sklearn import __version__ as sklearn_version X_iris, y_iris = iris_data() X_iris = X_iris[:, 1:3] breast_cancer = datasets.load_breast_cancer() X_breast, y_breast = breast_cancer.data[:, 1:3], breast_cancer.target def test_StackingCVClassifier(): np.random.seed(123) meta = LogisticRegression(multi_class='ovr', solver='liblinear') clf1 = RandomForestClassifier(n_estimators=10) clf2 = GaussianNB() sclf = StackingCVClassifier(classifiers=[clf1, clf2], meta_classifier=meta, shuffle=False)