def test_node_weights(): """ Test the implementation of node_weights. """ rng = np.random.RandomState(0) boston = load_boston() X, y = boston.data, boston.target n_train = 100 n_test = 100 X_train, y_train = X[:n_train], y[:n_train] X_test, y_test = X[-n_test:], y[-n_test:] minmax = MinMaxScaler() X_train = minmax.fit_transform(X_train) X_test = minmax.transform(X_test) # Test that when all samples are in the training data all weights # should be concentrated at the leaf. mtr = MondrianTreeRegressor(random_state=0) mtr.fit(X_train, y_train) leaf_nodes = mtr.apply(X_train) weights_sparse = mtr.weighted_decision_path(X_train) assert_array_equal(weights_sparse.data, np.ones(X_train.shape[0])) assert_array_equal(weights_sparse.indices, leaf_nodes) assert_array_equal(weights_sparse.indptr, np.arange(n_train + 1)) # Test prediction using the node_weights function gives similar results # to that using the prediction method. weights = mtr.weighted_decision_path(X_test) node_means = mtr.tree_.mean node_variances = mtr.tree_.variance variances1 = [] means1 = [] for startptr, endptr in zip(weights.indptr[:-1], weights.indptr[1:]): curr_nodes = weights.indices[startptr:endptr] curr_weights = weights.data[startptr:endptr] curr_means = node_means[curr_nodes] curr_var = node_variances[curr_nodes] means1.append(np.sum(curr_weights * curr_means)) variances1.append(np.sum(curr_weights * (curr_var + curr_means**2))) means1 = np.array(means1) variances1 = np.array(variances1) variances1 -= means1**2 means2, std2 = mtr.predict(X_test, return_std=True) assert_array_almost_equal(means1, means2, 5) assert_array_almost_equal(variances1, std2**2, 3)
def test_weighted_decision_path_test_regression(): X_train, X_test, y_train, y_test = load_scaled_boston() n_train = X_train.shape[0] mtr = MondrianTreeRegressor(random_state=0) mtr.fit(X_train, y_train) weights = mtr.weighted_decision_path(X_test) node_means = mtr.tree_.mean node_variances = mtr.tree_.variance variances1 = [] means1 = [] for startptr, endptr in zip(weights.indptr[:-1], weights.indptr[1:]): curr_nodes = weights.indices[startptr:endptr] curr_weights = weights.data[startptr:endptr] curr_means = node_means[curr_nodes] curr_var = node_variances[curr_nodes] means1.append(np.sum(curr_weights * curr_means)) variances1.append(np.sum(curr_weights * (curr_var + curr_means**2))) means1 = np.array(means1) variances1 = np.array(variances1) variances1 -= means1**2 means2, std2 = mtr.predict(X_test, return_std=True) assert_array_almost_equal(means1, means2, 5) assert_array_almost_equal(variances1, std2**2, 3)