Beispiel #1
0
def test_node_weights():
    """
    Test the implementation of node_weights.
    """
    rng = np.random.RandomState(0)
    boston = load_boston()
    X, y = boston.data, boston.target
    n_train = 100
    n_test = 100
    X_train, y_train = X[:n_train], y[:n_train]
    X_test, y_test = X[-n_test:], y[-n_test:]

    minmax = MinMaxScaler()
    X_train = minmax.fit_transform(X_train)
    X_test = minmax.transform(X_test)

    # Test that when all samples are in the training data all weights
    # should be concentrated at the leaf.
    mtr = MondrianTreeRegressor(random_state=0)
    mtr.fit(X_train, y_train)
    leaf_nodes = mtr.apply(X_train)
    weights_sparse = mtr.weighted_decision_path(X_train)
    assert_array_equal(weights_sparse.data, np.ones(X_train.shape[0]))
    assert_array_equal(weights_sparse.indices, leaf_nodes)
    assert_array_equal(weights_sparse.indptr, np.arange(n_train + 1))

    # Test prediction using the node_weights function gives similar results
    # to that using the prediction method.
    weights = mtr.weighted_decision_path(X_test)
    node_means = mtr.tree_.mean
    node_variances = mtr.tree_.variance
    variances1 = []
    means1 = []

    for startptr, endptr in zip(weights.indptr[:-1], weights.indptr[1:]):
        curr_nodes = weights.indices[startptr:endptr]
        curr_weights = weights.data[startptr:endptr]
        curr_means = node_means[curr_nodes]
        curr_var = node_variances[curr_nodes]

        means1.append(np.sum(curr_weights * curr_means))
        variances1.append(np.sum(curr_weights * (curr_var + curr_means**2)))

    means1 = np.array(means1)
    variances1 = np.array(variances1)
    variances1 -= means1**2
    means2, std2 = mtr.predict(X_test, return_std=True)
    assert_array_almost_equal(means1, means2, 5)
    assert_array_almost_equal(variances1, std2**2, 3)
Beispiel #2
0
def test_weighted_decision_path_test_regression():
    X_train, X_test, y_train, y_test = load_scaled_boston()
    n_train = X_train.shape[0]
    mtr = MondrianTreeRegressor(random_state=0)
    mtr.fit(X_train, y_train)
    weights = mtr.weighted_decision_path(X_test)
    node_means = mtr.tree_.mean
    node_variances = mtr.tree_.variance
    variances1 = []
    means1 = []

    for startptr, endptr in zip(weights.indptr[:-1], weights.indptr[1:]):
        curr_nodes = weights.indices[startptr:endptr]
        curr_weights = weights.data[startptr:endptr]
        curr_means = node_means[curr_nodes]
        curr_var = node_variances[curr_nodes]

        means1.append(np.sum(curr_weights * curr_means))
        variances1.append(np.sum(curr_weights * (curr_var + curr_means**2)))

    means1 = np.array(means1)
    variances1 = np.array(variances1)
    variances1 -= means1**2
    means2, std2 = mtr.predict(X_test, return_std=True)
    assert_array_almost_equal(means1, means2, 5)
    assert_array_almost_equal(variances1, std2**2, 3)