def test_friedman_mse_in_graphviz(): clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0) clf.fit(X, y) dot_data = StringIO() export_graphviz(clf, out_file=dot_data) clf = GradientBoostingClassifier(n_estimators=2, random_state=0) clf.fit(X, y) for estimator in clf.estimators_: export_graphviz(estimator[0], out_file=dot_data) for finding in finditer(r"\[.*?samples.*?\]", dot_data.getvalue()): assert "friedman_mse" in finding.group()
def test_thresholded_scorers(): # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LogisticRegression(random_state=0) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) assert_almost_equal(score1, score3) logscore = get_scorer('neg_log_loss')(clf, X_test, y_test) logloss = log_loss(y_test, clf.predict_proba(X_test)) assert_almost_equal(-logscore, logloss) # same for an estimator without decision_function clf = DecisionTreeClassifier() clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) # test with a regressor (no decision_function) reg = DecisionTreeRegressor() reg.fit(X_train, y_train) score1 = get_scorer('roc_auc')(reg, X_test, y_test) score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) # Test that an exception is raised on more than two classes X, y = make_blobs(random_state=0, centers=3) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf.fit(X_train, y_train) with pytest.raises(ValueError, match="multiclass format is not supported"): get_scorer('roc_auc')(clf, X_test, y_test) # test error is raised with a single class present in model # (predict_proba shape is not suitable for binary auc) X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = DecisionTreeClassifier() clf.fit(X_train, np.zeros_like(y_train)) with pytest.raises(ValueError, match="need classifier with two classes"): get_scorer('roc_auc')(clf, X_test, y_test) # for proba scorers with pytest.raises(ValueError, match="need classifier with two classes"): get_scorer('neg_log_loss')(clf, X_test, y_test)
def _make_estimators(X_train, y_train, y_ml_train): # Make estimators that make sense to test various scoring methods sensible_regr = DecisionTreeRegressor(random_state=0) # some of the regressions scorers require strictly positive input. sensible_regr.fit(X_train, y_train + 1) sensible_clf = DecisionTreeClassifier(random_state=0) sensible_clf.fit(X_train, y_train) sensible_ml_clf = DecisionTreeClassifier(random_state=0) sensible_ml_clf.fit(X_train, y_ml_train) return dict( [(name, sensible_regr) for name in REGRESSION_SCORERS] + [(name, sensible_clf) for name in CLF_SCORERS] + [(name, sensible_clf) for name in CLUSTER_SCORERS] + [(name, sensible_ml_clf) for name in MULTILABEL_ONLY_SCORERS] )
def test_export_text(): clf = DecisionTreeClassifier(max_depth=2, random_state=0) clf.fit(X, y) expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: -1 |--- feature_1 > 0.00 | |--- class: 1 """).lstrip() assert export_text(clf) == expected_report # testing that leaves at level 1 are not truncated assert export_text(clf, max_depth=0) == expected_report # testing that the rest of the tree is truncated assert export_text(clf, max_depth=10) == expected_report expected_report = dedent(""" |--- b <= 0.00 | |--- class: -1 |--- b > 0.00 | |--- class: 1 """).lstrip() assert export_text(clf, feature_names=['a', 'b']) == expected_report expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- weights: [3.00, 0.00] class: -1 |--- feature_1 > 0.00 | |--- weights: [0.00, 3.00] class: 1 """).lstrip() assert export_text(clf, show_weights=True) == expected_report expected_report = dedent(""" |- feature_1 <= 0.00 | |- class: -1 |- feature_1 > 0.00 | |- class: 1 """).lstrip() assert export_text(clf, spacing=1) == expected_report X_l = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, 1]] y_l = [-1, -1, -1, 1, 1, 1, 2] clf = DecisionTreeClassifier(max_depth=4, random_state=0) clf.fit(X_l, y_l) expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: -1 |--- feature_1 > 0.00 | |--- truncated branch of depth 2 """).lstrip() assert export_text(clf, max_depth=0) == expected_report X_mo = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y_mo = [[-1, -1], [-1, -1], [-1, -1], [1, 1], [1, 1], [1, 1]] reg = DecisionTreeRegressor(max_depth=2, random_state=0) reg.fit(X_mo, y_mo) expected_report = dedent(""" |--- feature_1 <= 0.0 | |--- value: [-1.0, -1.0] |--- feature_1 > 0.0 | |--- value: [1.0, 1.0] """).lstrip() assert export_text(reg, decimals=1) == expected_report assert export_text(reg, decimals=1, show_weights=True) == expected_report X_single = [[-2], [-1], [-1], [1], [1], [2]] reg = DecisionTreeRegressor(max_depth=2, random_state=0) reg.fit(X_single, y_mo) expected_report = dedent(""" |--- first <= 0.0 | |--- value: [-1.0, -1.0] |--- first > 0.0 | |--- value: [1.0, 1.0] """).lstrip() assert export_text(reg, decimals=1, feature_names=['first']) == expected_report assert export_text(reg, decimals=1, show_weights=True, feature_names=['first']) == expected_report