def test_quantile_attributes(): for est in estimators: est.fit(X_train, y_train) # If a sample is not present in a particular tree, that # corresponding leaf is marked as -1. assert_array_equal( np.vstack(np.where(est.y_train_leaves_ == -1)), np.vstack(np.where(est.y_weights_ == 0)) ) # Should sum up to number of leaf nodes. assert_array_equal( np.sum(est.y_weights_, axis=1), [sum(tree.tree_.children_left == -1) for tree in est.estimators_] ) n_est = est.n_estimators est.set_params(bootstrap=False) est.fit(X_train, y_train) assert_array_equal( np.sum(est.y_weights_, axis=1), [sum(tree.tree_.children_left == -1) for tree in est.estimators_] ) assert_true(np.all(est.y_train_leaves_ != -1))
def test_min_samples_split(): min_samples_split = 4 for curr_ensemble in ensembles: ensemble = clone(curr_ensemble) ensemble.set_params(min_samples_split=min_samples_split, n_estimators=100) ensemble.fit(X, y) for est in ensemble.estimators_: n_samples = est.tree_.n_node_samples leaves = est.tree_.children_left == -1 assert_true(np.all(n_samples[~leaves] >= min_samples_split))
def check_decision_path(ensemble): indicator, col_inds = ensemble.decision_path(X) indices, indptr = indicator.indices, indicator.indptr n_nodes = [est.tree_.node_count for est in ensemble.estimators_] assert_equal(indicator.shape[0], X.shape[0]) assert_equal(indicator.shape[1], sum(n_nodes)) assert_array_equal(np.diff(col_inds), n_nodes) # Check that all leaf nodes are in the decision path. leaf_indices = ensemble.apply(X) + np.reshape(col_inds[:-1], (1, -1)) for sample_ind, curr_leaf in enumerate(leaf_indices): sample_indices = indices[indptr[sample_ind]:indptr[sample_ind + 1]] assert_true(np.all(np.in1d(curr_leaf, sample_indices)))
def test_tree_forest_equivalence(): """ Test that a DecisionTree and RandomForest give equal quantile predictions when bootstrap is set to False. """ rfqr = RandomForestQuantileRegressor( random_state=0, bootstrap=False, max_depth=2) rfqr.fit(X_train, y_train) dtqr = DecisionTreeQuantileRegressor(random_state=0, max_depth=2) dtqr.fit(X_train, y_train) assert_true(np.all(rfqr.y_train_leaves_ == dtqr.y_train_leaves_)) assert_array_almost_equal( rfqr.predict(X_test, quantile=10), dtqr.predict(X_test, quantile=10), 5)
def test_base_forest_quantile(): """ Test that the base estimators belong to the correct class. """ rng = np.random.RandomState(0) X = rng.randn(10, 1) y = np.linspace(0.0, 100.0, 10) rfqr = RandomForestQuantileRegressor(random_state=0, max_depth=1) rfqr.fit(X, y) for est in rfqr.estimators_: assert_true(isinstance(est, DecisionTreeQuantileRegressor)) etqr = ExtraTreesQuantileRegressor(random_state=0, max_depth=1) etqr.fit(X, y) for est in etqr.estimators_: assert_true(isinstance(est, ExtraTreeQuantileRegressor))
def test_tree_identical_labels(): rng = np.random.RandomState(0) for est in estimators: X = rng.randn(100, 5) y = np.ones(100) c_est = clone(est) c_est.set_params(min_samples_split=2, max_depth=None) c_est.fit(X, y) assert_equal(c_est.tree_.n_node_samples, [100]) if isinstance(c_est, ClassifierMixin): assert_equal(c_est.tree_.value, [[[100]]]) else: assert_equal(c_est.tree_.value, [[[1.0]]]) X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1)) y = np.array([0.0] * 50 + [1.0] * 50) c_est.fit(X, y) leaf_ids = c_est.tree_.children_left == -1 assert_true(np.any(c_est.tree_.n_node_samples[leaf_ids] > 2))
def test_tree_identical_labels(): rng = np.random.RandomState(0) for ensemble in ensembles: X = rng.randn(100, 5) y = np.ones(100) ensemble.fit(X, y) for est in ensemble.estimators_: assert_equal(est.tree_.n_node_samples, [100]) if isinstance(est, ClassifierMixin): assert_equal(est.tree_.value, [[[100]]]) else: assert_equal(est.tree_.value, [[[1.0]]]) X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1)) y = np.array([0.0] * 50 + [1.0] * 50) ensemble.fit(X, y) for est in ensemble.estimators_: leaf_ids = est.tree_.children_left == -1 assert_true(np.any(est.tree_.n_node_samples[leaf_ids] > 2))
def test_apply(): X_train, X_test, y_train, y_test = load_scaled_boston() y_train = np.round(y_train) for est in estimators: est_clone = clone(est) est_clone.fit(X_train, y_train) train_leaves = est_clone.tree_.children_left[est_clone.apply(X_train)] test_leaves = est_clone.tree_.children_left[est_clone.apply(X_test)] assert_true(np.all(train_leaves == -1)) assert_true(np.all(test_leaves == -1)) est_clone.partial_fit(X_train, y_train) train_leaves = est_clone.tree_.children_left[est_clone.apply(X_train)] test_leaves = est_clone.tree_.children_left[est_clone.apply(X_test)] assert_true(np.all(train_leaves == -1)) assert_true(np.all(test_leaves == -1))
def test_forest_attributes(): mr = MondrianForestRegressor(n_estimators=5, random_state=0) mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_false(hasattr(mr, "classes_")) assert_false(hasattr(mr, "n_classes_")) mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_false(hasattr(mr, "classes_")) assert_false(hasattr(mr, "n_classes_")) mr = MondrianForestClassifier(n_estimators=5, random_state=0) mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_true(hasattr(mr, "classes_")) assert_true(hasattr(mr, "n_classes_")) mr = MondrianForestClassifier(n_estimators=5, random_state=0) mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_true(hasattr(mr, "classes_")) assert_true(hasattr(mr, "n_classes_"))