예제 #1
0
def _make_estimators(X_train, y_train, y_ml_train):
    # Make estimators that make sense to test various scoring methods
    sensible_regr = DecisionTreeRegressor(random_state=0)
    # some of the regressions scorers require strictly positive input.
    sensible_regr.fit(X_train, y_train + 1)
    sensible_clf = DecisionTreeClassifier(random_state=0)
    sensible_clf.fit(X_train, y_train)
    sensible_ml_clf = DecisionTreeClassifier(random_state=0)
    sensible_ml_clf.fit(X_train, y_ml_train)
    return dict([(name, sensible_regr) for name in REGRESSION_SCORERS] +
                [(name, sensible_clf)
                 for name in CLF_SCORERS] + [(name, sensible_clf)
                                             for name in CLUSTER_SCORERS] +
                [(name, sensible_ml_clf) for name in MULTILABEL_ONLY_SCORERS])
예제 #2
0
def test_thresholded_scorers():
    # Test scorers that take thresholds.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = get_scorer('neg_log_loss')(clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # test with a regressor (no decision_function)
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(reg, X_test, y_test)
    score2 = roc_auc_score(y_test, reg.predict(X_test))
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    with pytest.raises(ValueError, match="multiclass format is not supported"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # test error is raised with a single class present in model
    # (predict_proba shape is not suitable for binary auc)
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = DecisionTreeClassifier()
    clf.fit(X_train, np.zeros_like(y_train))
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # for proba scorers
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('neg_log_loss')(clf, X_test, y_test)
예제 #3
0
def test_ovr_ovo_regressor():
    # test that ovr and ovo work on regressors which don't have a decision_
    # function
    ovr = OneVsRestClassifier(DecisionTreeRegressor())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert len(ovr.estimators_) == n_classes
    assert_array_equal(np.unique(pred), [0, 1, 2])
    # we are doing something sensible
    assert np.mean(pred == iris.target) > .9

    ovr = OneVsOneClassifier(DecisionTreeRegressor())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert len(ovr.estimators_) == n_classes * (n_classes - 1) / 2
    assert_array_equal(np.unique(pred), [0, 1, 2])
    # we are doing something sensible
    assert np.mean(pred == iris.target) > .9
예제 #4
0
def bench_scikit_tree_regressor(X, Y):
    """Benchmark with scikit-learn decision tree regressor"""

    from mrex.tree import DecisionTreeRegressor

    gc.collect()

    # start time
    tstart = datetime.now()
    clf = DecisionTreeRegressor()
    clf.fit(X, Y).predict(X)
    delta = (datetime.now() - tstart)
    # stop time

    scikit_regressor_results.append(delta.seconds +
                                    delta.microseconds / mu_second)
예제 #5
0
def test_friedman_mse_in_graphviz():
    clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0)
    clf.fit(X, y)
    dot_data = StringIO()
    export_graphviz(clf, out_file=dot_data)

    clf = GradientBoostingClassifier(n_estimators=2, random_state=0)
    clf.fit(X, y)
    for estimator in clf.estimators_:
        export_graphviz(estimator[0], out_file=dot_data)

    for finding in finditer(r"\[.*?samples.*?\]", dot_data.getvalue()):
        assert "friedman_mse" in finding.group()
예제 #6
0
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {
        'n_estimators': (1, 2),
        'base_estimator__max_depth': (1, 2),
        'algorithm': ('SAMME', 'SAMME.R')
    }
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)
예제 #7
0
def test_precision():

    rng_reg = RandomState(2)
    rng_clf = RandomState(8)
    for X, y, clf in zip(
            (rng_reg.random_sample((5, 2)),
             rng_clf.random_sample((1000, 4))),
            (rng_reg.random_sample((5, )),
             rng_clf.randint(2, size=(1000, ))),
            (DecisionTreeRegressor(criterion="friedman_mse", random_state=0,
                                   max_depth=1),
             DecisionTreeClassifier(max_depth=1, random_state=0))):

        clf.fit(X, y)
        for precision in (4, 3):
            dot_data = export_graphviz(clf, out_file=None, precision=precision,
                                       proportion=True)

            # With the current random state, the impurity and the threshold
            # will have the number of precision set in the export_graphviz
            # function. We will check the number of precision with a strict
            # equality. The value reported will have only 2 precision and
            # therefore, only a less equal comparison will be done.

            # check value
            for finding in finditer(r"value = \d+\.\d+", dot_data):
                assert (
                    len(search(r"\.\d+", finding.group()).group()) <=
                    precision + 1)
            # check impurity
            if is_classifier(clf):
                pattern = r"gini = \d+\.\d+"
            else:
                pattern = r"friedman_mse = \d+\.\d+"

            # check impurity
            for finding in finditer(pattern, dot_data):
                assert (len(search(r"\.\d+", finding.group()).group()) ==
                             precision + 1)
            # check threshold
            for finding in finditer(r"<= \d+\.\d+", dot_data):
                assert (len(search(r"\.\d+", finding.group()).group()) ==
                             precision + 1)
예제 #8
0
def test_score_sample_weight():

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [
        DecisionTreeClassifier(max_depth=2),
        DecisionTreeRegressor(max_depth=2)
    ]
    sets = [datasets.load_iris(), datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert (est.score(ds.data, ds.target) != est.score(
            ds.data, ds.target,
            sample_weight=sample_weight)), ("Unweighted and weighted scores "
                                            "are unexpectedly equal")
예제 #9
0
"""
print(__doc__)

# Import the necessary modules and libraries
import numpy as np
from mrex.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression model
regr_1 = DecisionTreeRegressor(max_depth=2)
regr_2 = DecisionTreeRegressor(max_depth=5)
regr_1.fit(X, y)
regr_2.fit(X, y)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)

# Plot the results
plt.figure()
plt.scatter(X, y, s=20, edgecolor="black", c="darkorange", label="data")
plt.plot(X_test, y_1, color="cornflowerblue", label="max_depth=2", linewidth=2)
plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("data")
예제 #10
0
         [('lr', LinearRegression()),
          ('rf', RandomForestRegressor(n_estimators=5))]))]
)
@pytest.mark.parametrize("drop", [None, 'drop'])
def test_none_estimator_with_weights(X, y, voter, drop):
    # check that an estimator can be set to None and passing some weight
    # regression test for
    # https://github.com/scikit-learn/scikit-learn/issues/13777
    voter.fit(X, y, sample_weight=np.ones(y.shape))
    voter.set_params(lr=drop)
    voter.fit(X, y, sample_weight=np.ones(y.shape))
    y_pred = voter.predict(X)
    assert y_pred.shape == y.shape


@pytest.mark.parametrize(
    "estimator",
    [VotingRegressor(
        estimators=[('lr', LinearRegression()),
                    ('tree', DecisionTreeRegressor(random_state=0))]),
     VotingClassifier(
         estimators=[('lr', LogisticRegression(random_state=0)),
                     ('tree', DecisionTreeClassifier(random_state=0))])],
    ids=['VotingRegressor', 'VotingClassifier']
)
def test_check_estimators_voting_estimator(estimator):
    # FIXME: to be removed when meta-estimators can be specified themselves
    # their testing parameters (for required parameters).
    check_estimator(estimator)
    check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)
예제 #11
0
from mrex.ensemble import BaggingRegressor
from mrex.tree import DecisionTreeRegressor

# Settings
n_repeat = 50       # Number of iterations for computing expectations
n_train = 50        # Size of the training set
n_test = 1000       # Size of the test set
noise = 0.1         # Standard deviation of the noise
np.random.seed(0)

# Change this for exploring the bias-variance decomposition of other
# estimators. This should work well for estimators with high variance (e.g.,
# decision trees or KNN), but poorly for estimators with low variance (e.g.,
# linear models).
estimators = [("Tree", DecisionTreeRegressor()),
              ("Bagging(Tree)", BaggingRegressor(DecisionTreeRegressor()))]

n_estimators = len(estimators)


# Generate data
def f(x):
    x = x.ravel()

    return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)


def generate(n_samples, noise, n_repeat=1):
    X = np.random.rand(n_samples) * 10 - 5
    X = np.sort(X)
예제 #12
0
X = X.reshape(-1, 1)

# transform the dataset with KBinsDiscretizer
enc = KBinsDiscretizer(n_bins=10, encode='onehot')
X_binned = enc.fit_transform(X)

# predict with original dataset
fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(10, 4))
line = np.linspace(-3, 3, 1000, endpoint=False).reshape(-1, 1)
reg = LinearRegression().fit(X, y)
ax1.plot(line,
         reg.predict(line),
         linewidth=2,
         color='green',
         label="linear regression")
reg = DecisionTreeRegressor(min_samples_split=3, random_state=0).fit(X, y)
ax1.plot(line,
         reg.predict(line),
         linewidth=2,
         color='red',
         label="decision tree")
ax1.plot(X[:, 0], y, 'o', c='k')
ax1.legend(loc="best")
ax1.set_ylabel("Regression output")
ax1.set_xlabel("Input feature")
ax1.set_title("Result before discretization")

# predict with transformed dataset
line_binned = enc.transform(line)
reg = LinearRegression().fit(X_binned, y)
ax2.plot(line,
예제 #13
0
score_simple_imputer = pd.DataFrame()
for strategy in ('mean', 'median'):
    estimator = make_pipeline(
        SimpleImputer(missing_values=np.nan, strategy=strategy), br_estimator)
    score_simple_imputer[strategy] = cross_val_score(
        estimator,
        X_missing,
        y_missing,
        scoring='neg_mean_squared_error',
        cv=N_SPLITS)

# Estimate the score after iterative imputation of the missing values
# with different estimators
estimators = [
    BayesianRidge(),
    DecisionTreeRegressor(max_features='sqrt', random_state=0),
    ExtraTreesRegressor(n_estimators=10, random_state=0),
    KNeighborsRegressor(n_neighbors=15)
]
score_iterative_imputer = pd.DataFrame()
for impute_estimator in estimators:
    estimator = make_pipeline(
        IterativeImputer(random_state=0, estimator=impute_estimator),
        br_estimator)
    score_iterative_imputer[impute_estimator.__class__.__name__] = \
        cross_val_score(
            estimator, X_missing, y_missing, scoring='neg_mean_squared_error',
            cv=N_SPLITS
        )

scores = pd.concat(
예제 #14
0
def test_export_text():
    clf = DecisionTreeClassifier(max_depth=2, random_state=0)
    clf.fit(X, y)

    expected_report = dedent("""
    |--- feature_1 <= 0.00
    |   |--- class: -1
    |--- feature_1 >  0.00
    |   |--- class: 1
    """).lstrip()

    assert export_text(clf) == expected_report
    # testing that leaves at level 1 are not truncated
    assert export_text(clf, max_depth=0) == expected_report
    # testing that the rest of the tree is truncated
    assert export_text(clf, max_depth=10) == expected_report

    expected_report = dedent("""
    |--- b <= 0.00
    |   |--- class: -1
    |--- b >  0.00
    |   |--- class: 1
    """).lstrip()
    assert export_text(clf, feature_names=['a', 'b']) == expected_report

    expected_report = dedent("""
    |--- feature_1 <= 0.00
    |   |--- weights: [3.00, 0.00] class: -1
    |--- feature_1 >  0.00
    |   |--- weights: [0.00, 3.00] class: 1
    """).lstrip()
    assert export_text(clf, show_weights=True) == expected_report

    expected_report = dedent("""
    |- feature_1 <= 0.00
    | |- class: -1
    |- feature_1 >  0.00
    | |- class: 1
    """).lstrip()
    assert export_text(clf, spacing=1) == expected_report

    X_l = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, 1]]
    y_l = [-1, -1, -1, 1, 1, 1, 2]
    clf = DecisionTreeClassifier(max_depth=4, random_state=0)
    clf.fit(X_l, y_l)
    expected_report = dedent("""
    |--- feature_1 <= 0.00
    |   |--- class: -1
    |--- feature_1 >  0.00
    |   |--- truncated branch of depth 2
    """).lstrip()
    assert export_text(clf, max_depth=0) == expected_report

    X_mo = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
    y_mo = [[-1, -1], [-1, -1], [-1, -1], [1, 1], [1, 1], [1, 1]]

    reg = DecisionTreeRegressor(max_depth=2, random_state=0)
    reg.fit(X_mo, y_mo)

    expected_report = dedent("""
    |--- feature_1 <= 0.0
    |   |--- value: [-1.0, -1.0]
    |--- feature_1 >  0.0
    |   |--- value: [1.0, 1.0]
    """).lstrip()
    assert export_text(reg, decimals=1) == expected_report
    assert export_text(reg, decimals=1, show_weights=True) == expected_report

    X_single = [[-2], [-1], [-1], [1], [1], [2]]
    reg = DecisionTreeRegressor(max_depth=2, random_state=0)
    reg.fit(X_single, y_mo)

    expected_report = dedent("""
    |--- first <= 0.0
    |   |--- value: [-1.0, -1.0]
    |--- first >  0.0
    |   |--- value: [1.0, 1.0]
    """).lstrip()
    assert export_text(reg, decimals=1,
                       feature_names=['first']) == expected_report
    assert export_text(reg, decimals=1, show_weights=True,
                       feature_names=['first']) == expected_report
예제 #15
0
def test_graphviz_toy():
    # Check correctness of export_graphviz
    clf = DecisionTreeClassifier(max_depth=3,
                                 min_samples_split=2,
                                 criterion="gini",
                                 random_state=2)
    clf.fit(X, y)

    # Test export code
    contents1 = export_graphviz(clf, out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box] ;\n' \
                '0 [label="X[0] <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \
                'value = [3, 3]"] ;\n' \
                '1 [label="gini = 0.0\\nsamples = 3\\nvalue = [3, 0]"] ;\n' \
                '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                'headlabel="True"] ;\n' \
                '2 [label="gini = 0.0\\nsamples = 3\\nvalue = [0, 3]"] ;\n' \
                '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                'headlabel="False"] ;\n' \
                '}'

    assert contents1 == contents2

    # Test with feature_names
    contents1 = export_graphviz(clf, feature_names=["feature0", "feature1"],
                                out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box] ;\n' \
                '0 [label="feature0 <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \
                'value = [3, 3]"] ;\n' \
                '1 [label="gini = 0.0\\nsamples = 3\\nvalue = [3, 0]"] ;\n' \
                '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                'headlabel="True"] ;\n' \
                '2 [label="gini = 0.0\\nsamples = 3\\nvalue = [0, 3]"] ;\n' \
                '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                'headlabel="False"] ;\n' \
                '}'

    assert contents1 == contents2

    # Test with class_names
    contents1 = export_graphviz(clf, class_names=["yes", "no"], out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box] ;\n' \
                '0 [label="X[0] <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \
                'value = [3, 3]\\nclass = yes"] ;\n' \
                '1 [label="gini = 0.0\\nsamples = 3\\nvalue = [3, 0]\\n' \
                'class = yes"] ;\n' \
                '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                'headlabel="True"] ;\n' \
                '2 [label="gini = 0.0\\nsamples = 3\\nvalue = [0, 3]\\n' \
                'class = no"] ;\n' \
                '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                'headlabel="False"] ;\n' \
                '}'

    assert contents1 == contents2

    # Test plot_options
    contents1 = export_graphviz(clf, filled=True, impurity=False,
                                proportion=True, special_characters=True,
                                rounded=True, out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box, style="filled, rounded", color="black", ' \
                'fontname=helvetica] ;\n' \
                'edge [fontname=helvetica] ;\n' \
                '0 [label=<X<SUB>0</SUB> &le; 0.0<br/>samples = 100.0%<br/>' \
                'value = [0.5, 0.5]>, fillcolor="#ffffff"] ;\n' \
                '1 [label=<samples = 50.0%<br/>value = [1.0, 0.0]>, ' \
                'fillcolor="#e58139"] ;\n' \
                '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                'headlabel="True"] ;\n' \
                '2 [label=<samples = 50.0%<br/>value = [0.0, 1.0]>, ' \
                'fillcolor="#399de5"] ;\n' \
                '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                'headlabel="False"] ;\n' \
                '}'

    assert contents1 == contents2

    # Test max_depth
    contents1 = export_graphviz(clf, max_depth=0,
                                class_names=True, out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box] ;\n' \
                '0 [label="X[0] <= 0.0\\ngini = 0.5\\nsamples = 6\\n' \
                'value = [3, 3]\\nclass = y[0]"] ;\n' \
                '1 [label="(...)"] ;\n' \
                '0 -> 1 ;\n' \
                '2 [label="(...)"] ;\n' \
                '0 -> 2 ;\n' \
                '}'

    assert contents1 == contents2

    # Test max_depth with plot_options
    contents1 = export_graphviz(clf, max_depth=0, filled=True,
                                out_file=None, node_ids=True)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box, style="filled", color="black"] ;\n' \
                '0 [label="node #0\\nX[0] <= 0.0\\ngini = 0.5\\n' \
                'samples = 6\\nvalue = [3, 3]", fillcolor="#ffffff"] ;\n' \
                '1 [label="(...)", fillcolor="#C0C0C0"] ;\n' \
                '0 -> 1 ;\n' \
                '2 [label="(...)", fillcolor="#C0C0C0"] ;\n' \
                '0 -> 2 ;\n' \
                '}'

    assert contents1 == contents2

    # Test multi-output with weighted samples
    clf = DecisionTreeClassifier(max_depth=2,
                                 min_samples_split=2,
                                 criterion="gini",
                                 random_state=2)
    clf = clf.fit(X, y2, sample_weight=w)

    contents1 = export_graphviz(clf, filled=True,
                                impurity=False, out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box, style="filled", color="black"] ;\n' \
                '0 [label="X[0] <= 0.0\\nsamples = 6\\n' \
                'value = [[3.0, 1.5, 0.0]\\n' \
                '[3.0, 1.0, 0.5]]", fillcolor="#ffffff"] ;\n' \
                '1 [label="samples = 3\\nvalue = [[3, 0, 0]\\n' \
                '[3, 0, 0]]", fillcolor="#e58139"] ;\n' \
                '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                'headlabel="True"] ;\n' \
                '2 [label="X[0] <= 1.5\\nsamples = 3\\n' \
                'value = [[0.0, 1.5, 0.0]\\n' \
                '[0.0, 1.0, 0.5]]", fillcolor="#f1bd97"] ;\n' \
                '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                'headlabel="False"] ;\n' \
                '3 [label="samples = 2\\nvalue = [[0, 1, 0]\\n' \
                '[0, 1, 0]]", fillcolor="#e58139"] ;\n' \
                '2 -> 3 ;\n' \
                '4 [label="samples = 1\\nvalue = [[0.0, 0.5, 0.0]\\n' \
                '[0.0, 0.0, 0.5]]", fillcolor="#e58139"] ;\n' \
                '2 -> 4 ;\n' \
                '}'

    assert contents1 == contents2

    # Test regression output with plot_options
    clf = DecisionTreeRegressor(max_depth=3,
                                min_samples_split=2,
                                criterion="mse",
                                random_state=2)
    clf.fit(X, y)

    contents1 = export_graphviz(clf, filled=True, leaves_parallel=True,
                                out_file=None, rotate=True, rounded=True)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box, style="filled, rounded", color="black", ' \
                'fontname=helvetica] ;\n' \
                'graph [ranksep=equally, splines=polyline] ;\n' \
                'edge [fontname=helvetica] ;\n' \
                'rankdir=LR ;\n' \
                '0 [label="X[0] <= 0.0\\nmse = 1.0\\nsamples = 6\\n' \
                'value = 0.0", fillcolor="#f2c09c"] ;\n' \
                '1 [label="mse = 0.0\\nsamples = 3\\nvalue = -1.0", ' \
                'fillcolor="#ffffff"] ;\n' \
                '0 -> 1 [labeldistance=2.5, labelangle=-45, ' \
                'headlabel="True"] ;\n' \
                '2 [label="mse = 0.0\\nsamples = 3\\nvalue = 1.0", ' \
                'fillcolor="#e58139"] ;\n' \
                '0 -> 2 [labeldistance=2.5, labelangle=45, ' \
                'headlabel="False"] ;\n' \
                '{rank=same ; 0} ;\n' \
                '{rank=same ; 1; 2} ;\n' \
                '}'

    assert contents1 == contents2

    # Test classifier with degraded learning set
    clf = DecisionTreeClassifier(max_depth=3)
    clf.fit(X, y_degraded)

    contents1 = export_graphviz(clf, filled=True, out_file=None)
    contents2 = 'digraph Tree {\n' \
                'node [shape=box, style="filled", color="black"] ;\n' \
                '0 [label="gini = 0.0\\nsamples = 6\\nvalue = 6.0", ' \
                'fillcolor="#ffffff"] ;\n' \
                '}'