def test_set_params(): """set_params should be able to set estimators""" clf1 = LogisticRegression(random_state=123, C=1.0) clf2 = RandomForestClassifier(random_state=123, max_depth=None) clf3 = GaussianNB() eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft', weights=[1, 2]) assert_true('lr' in eclf1.named_estimators) assert_true(eclf1.named_estimators.lr is eclf1.estimators[0][1]) assert_true(eclf1.named_estimators.lr is eclf1.named_estimators['lr']) eclf1.fit(X, y) assert_true('lr' in eclf1.named_estimators_) assert_true(eclf1.named_estimators_.lr is eclf1.estimators_[0]) assert_true(eclf1.named_estimators_.lr is eclf1.named_estimators_['lr']) eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft', weights=[1, 2]) eclf2.set_params(nb=clf2).fit(X, y) assert_false(hasattr(eclf2, 'nb')) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params()) assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params()) eclf1.set_params(lr__C=10.0) eclf2.set_params(nb__max_depth=5) assert_true(eclf1.estimators[0][1].get_params()['C'] == 10.0) assert_true(eclf2.estimators[1][1].get_params()['max_depth'] == 5) assert_equal(eclf1.get_params()["lr__C"], eclf1.get_params()["lr"].get_params()['C'])
def test_set_params(): """set_params should be able to set estimators""" clf1 = LogisticRegression(random_state=123, C=1.0) clf2 = RandomForestClassifier(random_state=123, max_depth=None) clf3 = GaussianNB() eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft', weights=[1, 2]) assert 'lr' in eclf1.named_estimators assert eclf1.named_estimators.lr is eclf1.estimators[0][1] assert eclf1.named_estimators.lr is eclf1.named_estimators['lr'] eclf1.fit(X, y) assert 'lr' in eclf1.named_estimators_ assert eclf1.named_estimators_.lr is eclf1.estimators_[0] assert eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'] eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft', weights=[1, 2]) eclf2.set_params(nb=clf2).fit(X, y) assert not hasattr(eclf2, 'nb') assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params()) assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params()) eclf1.set_params(lr__C=10.0) eclf2.set_params(nb__max_depth=5) assert eclf1.estimators[0][1].get_params()['C'] == 10.0 assert eclf2.estimators[1][1].get_params()['max_depth'] == 5 assert_equal(eclf1.get_params()["lr__C"], eclf1.get_params()["lr"].get_params()['C'])
def test_shallow_planned_nested_list_indiv_operator(self): from lale.lib.sklearn import DecisionTreeClassifier, VotingClassifier clf = VotingClassifier(estimators=[("dtc", DecisionTreeClassifier())]) params = clf.get_params(deep=False) filtered_params = self.remove_lale_params(params) filtered_params["voting"] == "hard"
def test_set_estimator_none(drop): """VotingClassifier set_params should be able to set estimators as None or drop""" # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(n_estimators=10, random_state=123) clf3 = GaussianNB() eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 0, 0.5]).fit(X, y) eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 1, 0.5]) with pytest.warns(None) as record: eclf2.set_params(rf=drop).fit(X, y) assert record if drop is None else not record assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert dict(eclf2.estimators)["rf"] is drop assert len(eclf2.estimators_) == 2 assert all(isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_) assert eclf2.get_params()["rf"] is drop eclf1.set_params(voting='soft').fit(X, y) with pytest.warns(None) as record: eclf2.set_params(voting='soft').fit(X, y) assert record if drop is None else not record assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) msg = 'All estimators are dropped. At least one is required' with pytest.warns(None) as record: with pytest.raises(ValueError, match=msg): eclf2.set_params(lr=drop, rf=drop, nb=drop).fit(X, y) assert record if drop is None else not record # Test soft voting transform X1 = np.array([[1], [2]]) y1 = np.array([1, 2]) eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[0, 0.5], flatten_transform=False).fit(X1, y1) eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[1, 0.5], flatten_transform=False) with pytest.warns(None) as record: eclf2.set_params(rf=drop).fit(X1, y1) assert record if drop is None else not record assert_array_almost_equal(eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1., 0.], [0., 1.]]])) assert_array_almost_equal(eclf2.transform(X1), np.array([[[1., 0.], [0., 1.]]])) eclf1.set_params(voting='hard') eclf2.set_params(voting='hard') assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
def test_set_estimator_none(): """VotingClassifier set_params should be able to set estimators as None""" # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 0, 0.5]).fit(X, y) eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 1, 0.5]) eclf2.set_params(rf=None).fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_true(dict(eclf2.estimators)["rf"] is None) assert_true(len(eclf2.estimators_) == 2) assert_true( all([ not isinstance(est, RandomForestClassifier) for est in eclf2.estimators_ ])) assert_true(eclf2.get_params()["rf"] is None) eclf1.set_params(voting='soft').fit(X, y) eclf2.set_params(voting='soft').fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) msg = ('All estimators are None. At least one is required' ' to be a classifier!') assert_raise_message(ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y) # Test soft voting transform X1 = np.array([[1], [2]]) y1 = np.array([1, 2]) eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[0, 0.5], flatten_transform=False).fit(X1, y1) eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[1, 0.5], flatten_transform=False) eclf2.set_params(rf=None).fit(X1, y1) assert_array_almost_equal( eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1., 0.], [0., 1.]]])) assert_array_almost_equal(eclf2.transform(X1), np.array([[[1., 0.], [0., 1.]]])) eclf1.set_params(voting='hard') eclf2.set_params(voting='hard') assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
def test_set_estimator_none(): """VotingClassifier set_params should be able to set estimators as None""" # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 0, 0.5]).fit(X, y) eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 1, 0.5]) eclf2.set_params(rf=None).fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_true(dict(eclf2.estimators)["rf"] is None) assert_true(len(eclf2.estimators_) == 2) assert_true(all([not isinstance(est, RandomForestClassifier) for est in eclf2.estimators_])) assert_true(eclf2.get_params()["rf"] is None) eclf1.set_params(voting='soft').fit(X, y) eclf2.set_params(voting='soft').fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) msg = ('All estimators are None. At least one is required' ' to be a classifier!') assert_raise_message( ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y) # Test soft voting transform X1 = np.array([[1], [2]]) y1 = np.array([1, 2]) eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[0, 0.5], flatten_transform=False).fit(X1, y1) eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[1, 0.5], flatten_transform=False) eclf2.set_params(rf=None).fit(X1, y1) assert_array_almost_equal(eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1., 0.], [0., 1.]]])) assert_array_almost_equal(eclf2.transform(X1), np.array([[[1., 0.], [0., 1.]]])) eclf1.set_params(voting='hard') eclf2.set_params(voting='hard') assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
def test_set_estimator_drop(): # VotingClassifier set_params should be able to set estimators as drop # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(n_estimators=10, random_state=123) clf3 = GaussianNB() eclf1 = VotingClassifier( estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)], voting="hard", weights=[1, 0, 0.5], ).fit(X, y) eclf2 = VotingClassifier( estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)], voting="hard", weights=[1, 1, 0.5], ) eclf2.set_params(rf="drop").fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert dict(eclf2.estimators)["rf"] == "drop" assert len(eclf2.estimators_) == 2 assert all( isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_) assert eclf2.get_params()["rf"] == "drop" eclf1.set_params(voting="soft").fit(X, y) eclf2.set_params(voting="soft").fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) msg = "All estimators are dropped. At least one is required" with pytest.raises(ValueError, match=msg): eclf2.set_params(lr="drop", rf="drop", nb="drop").fit(X, y) # Test soft voting transform X1 = np.array([[1], [2]]) y1 = np.array([1, 2]) eclf1 = VotingClassifier( estimators=[("rf", clf2), ("nb", clf3)], voting="soft", weights=[0, 0.5], flatten_transform=False, ).fit(X1, y1) eclf2 = VotingClassifier( estimators=[("rf", clf2), ("nb", clf3)], voting="soft", weights=[1, 0.5], flatten_transform=False, ) eclf2.set_params(rf="drop").fit(X1, y1) assert_array_almost_equal( eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1.0, 0.0], [0.0, 1.0]]]), ) assert_array_almost_equal(eclf2.transform(X1), np.array([[[1.0, 0.0], [0.0, 1.0]]])) eclf1.set_params(voting="hard") eclf2.set_params(voting="hard") assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
weights = [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0), (0.0, 0.5, 0.5), (0.5, 0.0, 0.5), (0.5, 0.5, 0.0), (1.0/3, 1.0/3, 1.0/3)] param_grid = {} for prefix, orig_param_grid in zip(prefixes, param_grids): param_grid.update(dict(zip(["%s__%s" % (prefix, p) for p in orig_param_grid.keys()], orig_param_grid.values()))) for weight in weights: param_grid.update(dict(weights=[weight])) estimators = [('sgd', sgd_pipeline), ('adaboost', adaboost_pipeline), ('pca', pca_pipeline)] voting_clf = VotingClassifier(estimators=estimators, voting='soft') assert np.all([p in voting_clf.get_params().keys() for p in param_grid.keys()]) voting_grid = grid_search.GridSearchCV(estimator=voting_clf, param_grid=param_grid, n_jobs=-1, scoring='roc_auc') get_ipython().magic('time voting_grid.fit(X=X_train, y=y_train)') weight_name = "_".join(["%i" % (100 * w) for w in weight]) print ("... done %s ..." % weight_name) joblib.dump(voting_grid, "../results/voting_clf_%s.pkl" % weight_name) # ## Reload Estimators # In[18]: weights = [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0),
def test_set_estimator_drop(): # VotingClassifier set_params should be able to set estimators as drop # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(n_estimators=10, random_state=123) clf3 = GaussianNB() eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 0, 0.5]).fit(X, y) eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 1, 0.5]) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) eclf2.set_params(rf='drop').fit(X, y) assert not record assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert dict(eclf2.estimators)["rf"] == 'drop' assert len(eclf2.estimators_) == 2 assert all( isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_) assert eclf2.get_params()["rf"] == 'drop' eclf1.set_params(voting='soft').fit(X, y) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) eclf2.set_params(voting='soft').fit(X, y) assert not record assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) msg = 'All estimators are dropped. At least one is required' with pytest.warns(None) as record: with pytest.raises(ValueError, match=msg): eclf2.set_params(lr='drop', rf='drop', nb='drop').fit(X, y) assert not record # Test soft voting transform X1 = np.array([[1], [2]]) y1 = np.array([1, 2]) eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[0, 0.5], flatten_transform=False).fit(X1, y1) eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[1, 0.5], flatten_transform=False) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) eclf2.set_params(rf='drop').fit(X1, y1) assert not record assert_array_almost_equal( eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1., 0.], [0., 1.]]])) assert_array_almost_equal(eclf2.transform(X1), np.array([[[1., 0.], [0., 1.]]])) eclf1.set_params(voting='hard') eclf2.set_params(voting='hard') assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
voting='soft') clf_labels += ['Majority voting'] all_clf = [pipe1, clf2, pipe3, mv_clf] for clf, label in zip(all_clf, clf_labels): scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc') print('Accuracy: %0.2f (+/- %0.2f) [%s]' % (scores.mean(), scores.std(), label)) # Get parameter from VotingClassifier # Need to run the params to get the name of each classifier's params # for example : pipe1__clf__C, pipe1__clf__n_jobs, clf2__max_depth, clf2__criterion, etc print(mv_clf.get_params()) # Use GridSearch to find the best parameters from sklearn.model_selection import GridSearchCV # Find and paste the name with the parameters you would like to run # for example: C -> pipe1__clf__C, Max Depth -> clf2__max_depth params = {'pipe1__clf__C': [0.01, 0.1, 100.0], 'clf2__max_depth': [1, 2]} grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring='roc_auc') grid.fit(X_train, y_train) print('Best parameters: %s' % grid.best_params_) print('Accuracy: %.2f' % grid.best_score_)
axarr[idx[0], idx[1]].set_title(tt) plt.text(-3.5, -4.5, s='Sepal width [standardized]', ha='center', va='center', fontsize=12) plt.text(-10.5, 4.5, s='Petal length [standardized]', ha='center', va='center', fontsize=12, rotation=90) plt.savefig('voting_panel', bbox_inches='tight', dpi=300) plt.show() ############################################ ## 通过网格搜索调整逻辑回归的正则化系数C以及决策树的深度 #?????? mv_clf.get_params() from sklearn.model_selection import GridSearchCV params = {'dt__max_depth': [1, 2], 'lr__clf__C': [0.001, 0.1, 100.0]} grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring='roc_auc') grid.fit(X_train, y_train) cv_keys = ('mean_test_score', 'std_test_score', 'params') for r, _ in enumerate(grid.cv_results_['mean_test_score']): #??????? #r = 0 print("%0.3f +/- %0.2f %r" % (grid.cv_results_[cv_keys[0]][r], grid.cv_results_[cv_keys[1]][r] / 2.0,