Example #1
0
def test_weighted_decision_path():
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.6,
                                                        test_size=0.4)
    mr = MondrianForestRegressor(random_state=0)
    mr.fit(X_train, y_train)

    # decision_path is implemented in sklearn while
    # weighted_decision_path is implemented here so check
    paths, col_inds = mr.decision_path(X_train)
    weight_paths, weight_col_inds = mr.weighted_decision_path(X_train)
    assert_array_equal(col_inds, weight_col_inds)

    n_nodes = [est.tree_.node_count for est in mr.estimators_]
    assert_equal(weight_paths.shape[0], X_train.shape[0])
    assert_equal(weight_paths.shape[1], sum(n_nodes))

    # We are calculating the weighted decision path on train data, so
    # the weights should be concentrated at the leaves.
    leaf_indices = mr.apply(X_train)
    for est_ind, curr_leaf_indices in enumerate(leaf_indices.T):
        curr_path = weight_paths[:, col_inds[est_ind]:col_inds[est_ind +
                                                               1]].toarray()
        assert_array_equal(np.where(curr_path)[1], curr_leaf_indices)

    # Sum of the weights across all the nodes in each estimator
    # for each sample should sum up to 1.0
    assert_array_almost_equal(
        np.ravel(mr.weighted_decision_path(X_test)[0].sum(axis=1)),
        mr.n_estimators * np.ones(X_test.shape[0]), 5)
Example #2
0
def test_decision_path():
    mr = MondrianForestRegressor(random_state=0)
    mr.fit(X, y)
    indicator, col_inds = mr.decision_path(X)
    indices, indptr, data = indicator.indices, indicator.indptr, indicator.data

    n_nodes = [est.tree_.node_count for est in mr.estimators_]
    assert_equal(indicator.shape[0], X.shape[0])
    assert_equal(indicator.shape[1], sum(n_nodes))
    assert_array_equal(np.diff(col_inds), n_nodes)

    # Check that all leaf nodes are in the decision path.
    leaf_indices = mr.apply(X) + np.reshape(col_inds[:-1], (1, -1))
    for sample_ind, curr_leaf in enumerate(leaf_indices):
        sample_indices = indices[indptr[sample_ind]:indptr[sample_ind + 1]]
        assert_true(np.all(np.in1d(curr_leaf, sample_indices)))