def test_oob_score_regression(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert abs(test_score - clf.oob_score_) < 0.1 # Test with few estimators assert_warns( UserWarning, BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
def test_bagging_regressor_with_missing_inputs(): # Check that BaggingRegressor can accept X with missing/infinite data X = np.array([ [1, 3, 5], [2, None, 6], [2, np.nan, 6], [2, np.inf, 6], [2, np.NINF, 6], ]) y_values = [ np.array([2, 3, 3, 3, 3]), np.array([ [2, 1, 9], [3, 6, 8], [3, 6, 8], [3, 6, 8], [3, 6, 8], ]) ] for y in y_values: regressor = DecisionTreeRegressor() pipeline = make_pipeline(FunctionTransformer(replace), regressor) pipeline.fit(X, y).predict(X) bagging_regressor = BaggingRegressor(pipeline) y_hat = bagging_regressor.fit(X, y).predict(X) assert y.shape == y_hat.shape # Verify that exceptions can be raised by wrapper regressor regressor = DecisionTreeRegressor() pipeline = make_pipeline(regressor) assert_raises(ValueError, pipeline.fit, X, y) bagging_regressor = BaggingRegressor(pipeline) assert_raises(ValueError, bagging_regressor.fit, X, y)
def test_ovr_ovo_regressor(): # test that ovr and ovo work on regressors which don't have a decision_ # function ovr = OneVsRestClassifier(DecisionTreeRegressor()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) assert len(ovr.estimators_) == n_classes assert_array_equal(np.unique(pred), [0, 1, 2]) # we are doing something sensible assert np.mean(pred == iris.target) > .9 ovr = OneVsOneClassifier(DecisionTreeRegressor()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) assert len(ovr.estimators_) == n_classes * (n_classes - 1) / 2 assert_array_equal(np.unique(pred), [0, 1, 2]) # we are doing something sensible assert np.mean(pred == iris.target) > .9
def test_bootstrap_features(): # Test that bootstrapping features may generate duplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert boston.data.shape[1] == np.unique(features).shape[0] ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert boston.data.shape[1] > np.unique(features).shape[0]
def test_friedman_mse_in_graphviz(): clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0) clf.fit(X, y) dot_data = StringIO() export_graphviz(clf, out_file=dot_data) clf = GradientBoostingClassifier(n_estimators=2, random_state=0) clf.fit(X, y) for estimator in clf.estimators_: export_graphviz(estimator[0], out_file=dot_data) for finding in finditer(r"\[.*?samples.*?\]", dot_data.getvalue()): assert "friedman_mse" in finding.group()
def test_bootstrap_samples(): # Test that bootstrapping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) base_estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng).fit(X_train, y_train) assert (base_estimator.score(X_train, y_train) == ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng).fit(X_train, y_train) assert (base_estimator.score(X_train, y_train) > ensemble.score( X_train, y_train)) # check that each sampling correspond to a complete bootstrap resample. # the size of each bootstrap should be the same as the input data but # the data should be different (checked using the hash of the data). ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(), bootstrap=True).fit(X_train, y_train) training_hash = [] for estimator in ensemble.estimators_: assert estimator.training_size_ == X_train.shape[0] training_hash.append(estimator.training_hash_) assert len(set(training_hash)) == len(training_hash)
def test_thresholded_scorers(): # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LogisticRegression(random_state=0) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) assert_almost_equal(score1, score3) logscore = get_scorer('neg_log_loss')(clf, X_test, y_test) logloss = log_loss(y_test, clf.predict_proba(X_test)) assert_almost_equal(-logscore, logloss) # same for an estimator without decision_function clf = DecisionTreeClassifier() clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) # test with a regressor (no decision_function) reg = DecisionTreeRegressor() reg.fit(X_train, y_train) score1 = get_scorer('roc_auc')(reg, X_test, y_test) score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) # Test that an exception is raised on more than two classes X, y = make_blobs(random_state=0, centers=3) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf.fit(X_train, y_train) with pytest.raises(ValueError, match="multiclass format is not supported"): get_scorer('roc_auc')(clf, X_test, y_test) # test error is raised with a single class present in model # (predict_proba shape is not suitable for binary auc) X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = DecisionTreeClassifier() clf.fit(X_train, np.zeros_like(y_train)) with pytest.raises(ValueError, match="need classifier with two classes"): get_scorer('roc_auc')(clf, X_test, y_test) # for proba scorers with pytest.raises(ValueError, match="need classifier with two classes"): get_scorer('neg_log_loss')(clf, X_test, y_test)
def _make_estimators(X_train, y_train, y_ml_train): # Make estimators that make sense to test various scoring methods sensible_regr = DecisionTreeRegressor(random_state=0) # some of the regressions scorers require strictly positive input. sensible_regr.fit(X_train, y_train + 1) sensible_clf = DecisionTreeClassifier(random_state=0) sensible_clf.fit(X_train, y_train) sensible_ml_clf = DecisionTreeClassifier(random_state=0) sensible_ml_clf.fit(X_train, y_ml_train) return dict( [(name, sensible_regr) for name in REGRESSION_SCORERS] + [(name, sensible_clf) for name in CLF_SCORERS] + [(name, sensible_clf) for name in CLUSTER_SCORERS] + [(name, sensible_ml_clf) for name in MULTILABEL_ONLY_SCORERS] )
def test_parallel_regression(): # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
def test_gridsearch(): # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = { 'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2), 'algorithm': ('SAMME', 'SAMME.R') } clf = GridSearchCV(boost, parameters) clf.fit(iris.data, iris.target) # AdaBoost regression boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), random_state=0) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(boost, parameters) clf.fit(boston.data, boston.target)
def test_base_estimator(): # Check base_estimator and its default values. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) ensemble = BaggingClassifier(Perceptron(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, Perceptron) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) ensemble = BaggingRegressor(SVR(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, SVR)
def test_precision(): rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))), (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))), (DecisionTreeRegressor(criterion="friedman_mse", random_state=0, max_depth=1), DecisionTreeClassifier(max_depth=1, random_state=0))): clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz(clf, out_file=None, precision=precision, proportion=True) # With the current random state, the impurity and the threshold # will have the number of precision set in the export_graphviz # function. We will check the number of precision with a strict # equality. The value reported will have only 2 precision and # therefore, only a less equal comparison will be done. # check value for finding in finditer(r"value = \d+\.\d+", dot_data): assert ( len(search(r"\.\d+", finding.group()).group()) <= precision + 1) # check impurity if is_classifier(clf): pattern = r"gini = \d+\.\d+" else: pattern = r"friedman_mse = \d+\.\d+" # check impurity for finding in finditer(pattern, dot_data): assert (len(search(r"\.\d+", finding.group()).group()) == precision + 1) # check threshold for finding in finditer(r"<= \d+\.\d+", dot_data): assert (len(search(r"\.\d+", finding.group()).group()) == precision + 1)
def test_score_sample_weight(): rng = np.random.RandomState(0) # test both ClassifierMixin and RegressorMixin estimators = [ DecisionTreeClassifier(max_depth=2), DecisionTreeRegressor(max_depth=2) ] sets = [datasets.load_iris(), datasets.load_boston()] for est, ds in zip(estimators, sets): est.fit(ds.data, ds.target) # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different assert (est.score(ds.data, ds.target) != est.score( ds.data, ds.target, sample_weight=sample_weight)), ("Unweighted and weighted scores " "are unexpectedly equal")
def test_errors(pyplot): X, y_multiclass = make_classification(n_classes=3, n_samples=50, n_informative=3, random_state=0) y_binary = y_multiclass == 0 # Unfitted classifer binary_clf = DecisionTreeClassifier() with pytest.raises(NotFittedError): plot_precision_recall_curve(binary_clf, X, y_binary) binary_clf.fit(X, y_binary) multi_clf = DecisionTreeClassifier().fit(X, y_multiclass) # Fitted multiclass classifier with binary data msg = "DecisionTreeClassifier should be a binary classifier" with pytest.raises(ValueError, match=msg): plot_precision_recall_curve(multi_clf, X, y_binary) reg = DecisionTreeRegressor().fit(X, y_multiclass) msg = "DecisionTreeRegressor should be a binary classifier" with pytest.raises(ValueError, match=msg): plot_precision_recall_curve(reg, X, y_binary)
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({ "max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False] }) for base_estimator in [ None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR() ]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
def test_export_text(): clf = DecisionTreeClassifier(max_depth=2, random_state=0) clf.fit(X, y) expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: -1 |--- feature_1 > 0.00 | |--- class: 1 """).lstrip() assert export_text(clf) == expected_report # testing that leaves at level 1 are not truncated assert export_text(clf, max_depth=0) == expected_report # testing that the rest of the tree is truncated assert export_text(clf, max_depth=10) == expected_report expected_report = dedent(""" |--- b <= 0.00 | |--- class: -1 |--- b > 0.00 | |--- class: 1 """).lstrip() assert export_text(clf, feature_names=['a', 'b']) == expected_report expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- weights: [3.00, 0.00] class: -1 |--- feature_1 > 0.00 | |--- weights: [0.00, 3.00] class: 1 """).lstrip() assert export_text(clf, show_weights=True) == expected_report expected_report = dedent(""" |- feature_1 <= 0.00 | |- class: -1 |- feature_1 > 0.00 | |- class: 1 """).lstrip() assert export_text(clf, spacing=1) == expected_report X_l = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, 1]] y_l = [-1, -1, -1, 1, 1, 1, 2] clf = DecisionTreeClassifier(max_depth=4, random_state=0) clf.fit(X_l, y_l) expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: -1 |--- feature_1 > 0.00 | |--- truncated branch of depth 2 """).lstrip() assert export_text(clf, max_depth=0) == expected_report X_mo = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y_mo = [[-1, -1], [-1, -1], [-1, -1], [1, 1], [1, 1], [1, 1]] reg = DecisionTreeRegressor(max_depth=2, random_state=0) reg.fit(X_mo, y_mo) expected_report = dedent(""" |--- feature_1 <= 0.0 | |--- value: [-1.0, -1.0] |--- feature_1 > 0.0 | |--- value: [1.0, 1.0] """).lstrip() assert export_text(reg, decimals=1) == expected_report assert export_text(reg, decimals=1, show_weights=True) == expected_report X_single = [[-2], [-1], [-1], [1], [1], [2]] reg = DecisionTreeRegressor(max_depth=2, random_state=0) reg.fit(X_single, y_mo) expected_report = dedent(""" |--- first <= 0.0 | |--- value: [-1.0, -1.0] |--- first > 0.0 | |--- value: [1.0, 1.0] """).lstrip() assert export_text(reg, decimals=1, feature_names=['first']) == expected_report assert export_text(reg, decimals=1, show_weights=True, feature_names=['first']) == expected_report
def test_graphviz_toy(): # Check correctness of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2, criterion="gini", random_state=2) clf.fit(X, y) # Test export code contents1 = export_graphviz(clf, out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box] ;\n' \ '0 [label="X[0] <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \ 'value = [3, 3]"] ;\n' \ '1 [label="gini = 0.0\\nsamples = 3\\nvalue = [3, 0]"] ;\n' \ '0 -> 1 [labeldistance=2.5, labelangle=45, ' \ 'headlabel="True"] ;\n' \ '2 [label="gini = 0.0\\nsamples = 3\\nvalue = [0, 3]"] ;\n' \ '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \ 'headlabel="False"] ;\n' \ '}' assert contents1 == contents2 # Test with feature_names contents1 = export_graphviz(clf, feature_names=["feature0", "feature1"], out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box] ;\n' \ '0 [label="feature0 <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \ 'value = [3, 3]"] ;\n' \ '1 [label="gini = 0.0\\nsamples = 3\\nvalue = [3, 0]"] ;\n' \ '0 -> 1 [labeldistance=2.5, labelangle=45, ' \ 'headlabel="True"] ;\n' \ '2 [label="gini = 0.0\\nsamples = 3\\nvalue = [0, 3]"] ;\n' \ '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \ 'headlabel="False"] ;\n' \ '}' assert contents1 == contents2 # Test with class_names contents1 = export_graphviz(clf, class_names=["yes", "no"], out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box] ;\n' \ '0 [label="X[0] <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \ 'value = [3, 3]\\nclass = yes"] ;\n' \ '1 [label="gini = 0.0\\nsamples = 3\\nvalue = [3, 0]\\n' \ 'class = yes"] ;\n' \ '0 -> 1 [labeldistance=2.5, labelangle=45, ' \ 'headlabel="True"] ;\n' \ '2 [label="gini = 0.0\\nsamples = 3\\nvalue = [0, 3]\\n' \ 'class = no"] ;\n' \ '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \ 'headlabel="False"] ;\n' \ '}' assert contents1 == contents2 # Test plot_options contents1 = export_graphviz(clf, filled=True, impurity=False, proportion=True, special_characters=True, rounded=True, out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box, style="filled, rounded", color="black", ' \ 'fontname=helvetica] ;\n' \ 'edge [fontname=helvetica] ;\n' \ '0 [label=<X<SUB>0</SUB> ≤ 0.0<br/>samples = 100.0%<br/>' \ 'value = [0.5, 0.5]>, fillcolor="#ffffff"] ;\n' \ '1 [label=<samples = 50.0%<br/>value = [1.0, 0.0]>, ' \ 'fillcolor="#e58139"] ;\n' \ '0 -> 1 [labeldistance=2.5, labelangle=45, ' \ 'headlabel="True"] ;\n' \ '2 [label=<samples = 50.0%<br/>value = [0.0, 1.0]>, ' \ 'fillcolor="#399de5"] ;\n' \ '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \ 'headlabel="False"] ;\n' \ '}' assert contents1 == contents2 # Test max_depth contents1 = export_graphviz(clf, max_depth=0, class_names=True, out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box] ;\n' \ '0 [label="X[0] <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \ 'value = [3, 3]\\nclass = y[0]"] ;\n' \ '1 [label="(...)"] ;\n' \ '0 -> 1 ;\n' \ '2 [label="(...)"] ;\n' \ '0 -> 2 ;\n' \ '}' assert contents1 == contents2 # Test max_depth with plot_options contents1 = export_graphviz(clf, max_depth=0, filled=True, out_file=None, node_ids=True) contents2 = 'digraph Tree {\n' \ 'node [shape=box, style="filled", color="black"] ;\n' \ '0 [label="node #0\\nX[0] <= 0.0\\ngini = 0.5\\n' \ 'samples = 6\\nvalue = [3, 3]", fillcolor="#ffffff"] ;\n' \ '1 [label="(...)", fillcolor="#C0C0C0"] ;\n' \ '0 -> 1 ;\n' \ '2 [label="(...)", fillcolor="#C0C0C0"] ;\n' \ '0 -> 2 ;\n' \ '}' assert contents1 == contents2 # Test multi-output with weighted samples clf = DecisionTreeClassifier(max_depth=2, min_samples_split=2, criterion="gini", random_state=2) clf = clf.fit(X, y2, sample_weight=w) contents1 = export_graphviz(clf, filled=True, impurity=False, out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box, style="filled", color="black"] ;\n' \ '0 [label="X[0] <= 0.0\\nsamples = 6\\n' \ 'value = [[3.0, 1.5, 0.0]\\n' \ '[3.0, 1.0, 0.5]]", fillcolor="#ffffff"] ;\n' \ '1 [label="samples = 3\\nvalue = [[3, 0, 0]\\n' \ '[3, 0, 0]]", fillcolor="#e58139"] ;\n' \ '0 -> 1 [labeldistance=2.5, labelangle=45, ' \ 'headlabel="True"] ;\n' \ '2 [label="X[0] <= 1.5\\nsamples = 3\\n' \ 'value = [[0.0, 1.5, 0.0]\\n' \ '[0.0, 1.0, 0.5]]", fillcolor="#f1bd97"] ;\n' \ '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \ 'headlabel="False"] ;\n' \ '3 [label="samples = 2\\nvalue = [[0, 1, 0]\\n' \ '[0, 1, 0]]", fillcolor="#e58139"] ;\n' \ '2 -> 3 ;\n' \ '4 [label="samples = 1\\nvalue = [[0.0, 0.5, 0.0]\\n' \ '[0.0, 0.0, 0.5]]", fillcolor="#e58139"] ;\n' \ '2 -> 4 ;\n' \ '}' assert contents1 == contents2 # Test regression output with plot_options clf = DecisionTreeRegressor(max_depth=3, min_samples_split=2, criterion="mse", random_state=2) clf.fit(X, y) contents1 = export_graphviz(clf, filled=True, leaves_parallel=True, out_file=None, rotate=True, rounded=True) contents2 = 'digraph Tree {\n' \ 'node [shape=box, style="filled, rounded", color="black", ' \ 'fontname=helvetica] ;\n' \ 'graph [ranksep=equally, splines=polyline] ;\n' \ 'edge [fontname=helvetica] ;\n' \ 'rankdir=LR ;\n' \ '0 [label="X[0] <= 0.0\\nmse = 1.0\\nsamples = 6\\n' \ 'value = 0.0", fillcolor="#f2c09c"] ;\n' \ '1 [label="mse = 0.0\\nsamples = 3\\nvalue = -1.0", ' \ 'fillcolor="#ffffff"] ;\n' \ '0 -> 1 [labeldistance=2.5, labelangle=-45, ' \ 'headlabel="True"] ;\n' \ '2 [label="mse = 0.0\\nsamples = 3\\nvalue = 1.0", ' \ 'fillcolor="#e58139"] ;\n' \ '0 -> 2 [labeldistance=2.5, labelangle=45, ' \ 'headlabel="False"] ;\n' \ '{rank=same ; 0} ;\n' \ '{rank=same ; 1; 2} ;\n' \ '}' assert contents1 == contents2 # Test classifier with degraded learning set clf = DecisionTreeClassifier(max_depth=3) clf.fit(X, y_degraded) contents1 = export_graphviz(clf, filled=True, out_file=None) contents2 = 'digraph Tree {\n' \ 'node [shape=box, style="filled", color="black"] ;\n' \ '0 [label="gini = 0.0\\nsamples = 6\\nvalue = 6.0", ' \ 'fillcolor="#ffffff"] ;\n' \ '}'
# regression test for # https://github.com/scikit-learn/scikit-learn/issues/13777 voter = clone(voter) voter.fit(X, y, sample_weight=np.ones(y.shape)) voter.set_params(lr=drop) with pytest.warns(None) as record: voter.fit(X, y, sample_weight=np.ones(y.shape)) assert record if drop is None else not record y_pred = voter.predict(X) assert y_pred.shape == y.shape @pytest.mark.parametrize("estimator", [ VotingRegressor(estimators=[( 'lr', LinearRegression()), ('tree', DecisionTreeRegressor(random_state=0))]), VotingClassifier(estimators=[('lr', LogisticRegression( random_state=0)), ('tree', DecisionTreeClassifier(random_state=0))]) ], ids=['VotingRegressor', 'VotingClassifier']) def test_check_estimators_voting_estimator(estimator): # FIXME: to be removed when meta-estimators can specified themselves # their testing parameters (for required parameters). check_estimator(estimator) check_no_attributes_set_in_init(estimator.__class__.__name__, estimator) # TODO: Remove in 0.24 when None is removed in Voting* @pytest.mark.parametrize("Voter, BaseEstimator", [(VotingClassifier, DecisionTreeClassifier), (VotingRegressor, DecisionTreeRegressor)])