Exemple #1
0
def test_quantile_attributes():
    for est in estimators:
        est.fit(X_train, y_train)

        # If a sample is not present in a particular tree, that
        # corresponding leaf is marked as -1.
        assert_array_equal(
            np.vstack(np.where(est.y_train_leaves_ == -1)),
            np.vstack(np.where(est.y_weights_ == 0))
        )

        # Should sum up to number of leaf nodes.
        assert_array_equal(
            np.sum(est.y_weights_, axis=1),
            [sum(tree.tree_.children_left == -1) for tree in est.estimators_]
        )

        n_est = est.n_estimators
        est.set_params(bootstrap=False)
        est.fit(X_train, y_train)
        assert_array_equal(
            np.sum(est.y_weights_, axis=1),
            [sum(tree.tree_.children_left == -1) for tree in est.estimators_]
        )
        assert_true(np.all(est.y_train_leaves_ != -1))
def test_min_samples_split():
    min_samples_split = 4
    for curr_ensemble in ensembles:
        ensemble = clone(curr_ensemble)
        ensemble.set_params(min_samples_split=min_samples_split,
                            n_estimators=100)
        ensemble.fit(X, y)
        for est in ensemble.estimators_:
            n_samples = est.tree_.n_node_samples
            leaves = est.tree_.children_left == -1
            assert_true(np.all(n_samples[~leaves] >= min_samples_split))
def check_decision_path(ensemble):
    indicator, col_inds = ensemble.decision_path(X)
    indices, indptr = indicator.indices, indicator.indptr

    n_nodes = [est.tree_.node_count for est in ensemble.estimators_]
    assert_equal(indicator.shape[0], X.shape[0])
    assert_equal(indicator.shape[1], sum(n_nodes))
    assert_array_equal(np.diff(col_inds), n_nodes)

    # Check that all leaf nodes are in the decision path.
    leaf_indices = ensemble.apply(X) + np.reshape(col_inds[:-1], (1, -1))
    for sample_ind, curr_leaf in enumerate(leaf_indices):
        sample_indices = indices[indptr[sample_ind]:indptr[sample_ind + 1]]
        assert_true(np.all(np.in1d(curr_leaf, sample_indices)))
Exemple #4
0
def test_tree_forest_equivalence():
    """
    Test that a DecisionTree and RandomForest give equal quantile
    predictions when bootstrap is set to False.
    """
    rfqr = RandomForestQuantileRegressor(
        random_state=0, bootstrap=False, max_depth=2)
    rfqr.fit(X_train, y_train)

    dtqr = DecisionTreeQuantileRegressor(random_state=0, max_depth=2)
    dtqr.fit(X_train, y_train)

    assert_true(np.all(rfqr.y_train_leaves_ == dtqr.y_train_leaves_))
    assert_array_almost_equal(
        rfqr.predict(X_test, quantile=10),
        dtqr.predict(X_test, quantile=10), 5)
Exemple #5
0
def test_base_forest_quantile():
    """
    Test that the base estimators belong to the correct class.
    """
    rng = np.random.RandomState(0)
    X = rng.randn(10, 1)
    y = np.linspace(0.0, 100.0, 10)

    rfqr = RandomForestQuantileRegressor(random_state=0, max_depth=1)
    rfqr.fit(X, y)
    for est in rfqr.estimators_:
        assert_true(isinstance(est, DecisionTreeQuantileRegressor))

    etqr = ExtraTreesQuantileRegressor(random_state=0, max_depth=1)
    etqr.fit(X, y)
    for est in etqr.estimators_:
        assert_true(isinstance(est, ExtraTreeQuantileRegressor))
Exemple #6
0
def test_tree_identical_labels():
    rng = np.random.RandomState(0)
    for est in estimators:
        X = rng.randn(100, 5)
        y = np.ones(100)
        c_est = clone(est)
        c_est.set_params(min_samples_split=2, max_depth=None)
        c_est.fit(X, y)
        assert_equal(c_est.tree_.n_node_samples, [100])
        if isinstance(c_est, ClassifierMixin):
            assert_equal(c_est.tree_.value, [[[100]]])
        else:
            assert_equal(c_est.tree_.value, [[[1.0]]])

        X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1))
        y = np.array([0.0] * 50 + [1.0] * 50)
        c_est.fit(X, y)
        leaf_ids = c_est.tree_.children_left == -1
        assert_true(np.any(c_est.tree_.n_node_samples[leaf_ids] > 2))
def test_tree_identical_labels():
    rng = np.random.RandomState(0)
    for ensemble in ensembles:
        X = rng.randn(100, 5)
        y = np.ones(100)
        ensemble.fit(X, y)
        for est in ensemble.estimators_:
            assert_equal(est.tree_.n_node_samples, [100])

            if isinstance(est, ClassifierMixin):
                assert_equal(est.tree_.value, [[[100]]])
            else:
                assert_equal(est.tree_.value, [[[1.0]]])

        X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1))
        y = np.array([0.0] * 50 + [1.0] * 50)
        ensemble.fit(X, y)
        for est in ensemble.estimators_:
            leaf_ids = est.tree_.children_left == -1
            assert_true(np.any(est.tree_.n_node_samples[leaf_ids] > 2))
Exemple #8
0
def test_apply():
    X_train, X_test, y_train, y_test = load_scaled_boston()
    y_train = np.round(y_train)
    for est in estimators:
        est_clone = clone(est)
        est_clone.fit(X_train, y_train)
        train_leaves = est_clone.tree_.children_left[est_clone.apply(X_train)]
        test_leaves = est_clone.tree_.children_left[est_clone.apply(X_test)]
        assert_true(np.all(train_leaves == -1))
        assert_true(np.all(test_leaves == -1))

        est_clone.partial_fit(X_train, y_train)
        train_leaves = est_clone.tree_.children_left[est_clone.apply(X_train)]
        test_leaves = est_clone.tree_.children_left[est_clone.apply(X_test)]
        assert_true(np.all(train_leaves == -1))
        assert_true(np.all(test_leaves == -1))
def test_forest_attributes():
    mr = MondrianForestRegressor(n_estimators=5, random_state=0)
    mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_false(hasattr(mr, "classes_"))
    assert_false(hasattr(mr, "n_classes_"))

    mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_false(hasattr(mr, "classes_"))
    assert_false(hasattr(mr, "n_classes_"))

    mr = MondrianForestClassifier(n_estimators=5, random_state=0)
    mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_true(hasattr(mr, "classes_"))
    assert_true(hasattr(mr, "n_classes_"))

    mr = MondrianForestClassifier(n_estimators=5, random_state=0)
    mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_true(hasattr(mr, "classes_"))
    assert_true(hasattr(mr, "n_classes_"))