def test_weighted_decision_path(): X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, test_size=0.4) mr = MondrianForestRegressor(random_state=0) mr.fit(X_train, y_train) # decision_path is implemented in sklearn while # weighted_decision_path is implemented here so check paths, col_inds = mr.decision_path(X_train) weight_paths, weight_col_inds = mr.weighted_decision_path(X_train) assert_array_equal(col_inds, weight_col_inds) n_nodes = [est.tree_.node_count for est in mr.estimators_] assert_equal(weight_paths.shape[0], X_train.shape[0]) assert_equal(weight_paths.shape[1], sum(n_nodes)) # We are calculating the weighted decision path on train data, so # the weights should be concentrated at the leaves. leaf_indices = mr.apply(X_train) for est_ind, curr_leaf_indices in enumerate(leaf_indices.T): curr_path = weight_paths[:, col_inds[est_ind]:col_inds[est_ind + 1]].toarray() assert_array_equal(np.where(curr_path)[1], curr_leaf_indices) # Sum of the weights across all the nodes in each estimator # for each sample should sum up to 1.0 assert_array_almost_equal( np.ravel(mr.weighted_decision_path(X_test)[0].sum(axis=1)), mr.n_estimators * np.ones(X_test.shape[0]), 5)
def test_decision_path(): mr = MondrianForestRegressor(random_state=0) mr.fit(X, y) indicator, col_inds = mr.decision_path(X) indices, indptr, data = indicator.indices, indicator.indptr, indicator.data n_nodes = [est.tree_.node_count for est in mr.estimators_] assert_equal(indicator.shape[0], X.shape[0]) assert_equal(indicator.shape[1], sum(n_nodes)) assert_array_equal(np.diff(col_inds), n_nodes) # Check that all leaf nodes are in the decision path. leaf_indices = mr.apply(X) + np.reshape(col_inds[:-1], (1, -1)) for sample_ind, curr_leaf in enumerate(leaf_indices): sample_indices = indices[indptr[sample_ind]:indptr[sample_ind + 1]] assert_true(np.all(np.in1d(curr_leaf, sample_indices)))