def test_var(self,): # test that the estimator calcualtes var correctly config = self._get_base_config() config['honest'] = True config['max_depth'] = 0 config['inference'] = True config['n_estimators'] = 1000 config['subforest_size'] = 2 config['max_samples'] = .5 config['n_jobs'] = 1 n_features = 2 # test api n = 100 random_state = 123 X, y, truth = self._get_regression_data(n, n_features, random_state) forest = RegressionForest(**config).fit(X, y) alpha = .1 mean, var = forest.predict_and_var(X) lb = scipy.stats.norm.ppf(alpha / 2, loc=mean[:, 0], scale=np.sqrt(var[:, 0, 0])).reshape(-1, 1) ub = scipy.stats.norm.ppf(1 - alpha / 2, loc=mean[:, 0], scale=np.sqrt(var[:, 0, 0])).reshape(-1, 1) np.testing.assert_allclose(var, forest.predict_var(X)) lbtest, ubtest = forest.predict_interval(X, alpha=alpha) np.testing.assert_allclose(lb, lbtest) np.testing.assert_allclose(ub, ubtest) meantest, lbtest, ubtest = forest.predict(X, interval=True, alpha=alpha) np.testing.assert_allclose(mean, meantest) np.testing.assert_allclose(lb, lbtest) np.testing.assert_allclose(ub, ubtest) np.testing.assert_allclose(np.sqrt(var[:, 0, 0]), forest.prediction_stderr(X)[:, 0]) # test accuracy for n in [10, 100, 1000, 10000]: random_state = 123 X, y, truth = self._get_regression_data(n, n_features, random_state) forest = RegressionForest(**config).fit(X, y) our_mean, our_var = forest.predict_and_var(X[:1]) true_mean, true_var = np.mean(y), np.var(y) / y.shape[0] np.testing.assert_allclose(our_mean, true_mean, atol=0.05) np.testing.assert_allclose(our_var, true_var, atol=0.05, rtol=.1) for n, our_thr, true_thr in [(1000, .5, .25), (10000, .05, .05)]: random_state = 123 config['max_depth'] = 1 X, y, truth = self._get_step_regression_data(n, n_features, random_state) forest = RegressionForest(**config).fit(X, y) posX = X[X[:, 0] > our_thr] negX = X[X[:, 0] < -our_thr] our_pos_mean, our_pos_var = forest.predict_and_var(posX) our_neg_mean, our_neg_var = forest.predict_and_var(negX) pos = X[:, 0] > true_thr true_pos_mean, true_pos_var = np.mean(y[pos]), np.var(y[pos]) / y[pos].shape[0] neg = X[:, 0] < -true_thr true_neg_mean, true_neg_var = np.mean(y[neg]), np.var(y[neg]) / y[neg].shape[0] np.testing.assert_allclose(our_pos_mean, true_pos_mean, atol=0.07) np.testing.assert_allclose(our_pos_var, true_pos_var, atol=0.0, rtol=.25) np.testing.assert_allclose(our_neg_mean, true_neg_mean, atol=0.07) np.testing.assert_allclose(our_neg_var, true_neg_var, atol=0.0, rtol=.25) return
def test_projection(self, ): # test the projection functionality of forests # test that the estimator calcualtes var correctly np.set_printoptions(precision=10, suppress=True) config = self._get_base_config() config['honest'] = True config['max_depth'] = 0 config['inference'] = True config['n_estimators'] = 100 config['subforest_size'] = 2 config['max_samples'] = .5 config['n_jobs'] = 1 n_features = 2 # test api n = 100 random_state = 123 X, y, truth = self._get_regression_data(n, n_features, random_state) forest = RegressionForest(**config).fit(X, y) mean, var = forest.predict_and_var(X) mean = mean.flatten() var = var.flatten() y = np.hstack([y, y]) truth = np.hstack([truth, truth]) forest = RegressionForest(**config).fit(X, y) projector = np.ones((X.shape[0], 2)) / 2.0 mean_proj, var_proj = forest.predict_projection_and_var(X, projector) np.testing.assert_array_equal(mean_proj, mean) np.testing.assert_array_equal(var_proj, var) np.testing.assert_array_equal( var_proj, forest.predict_projection_var(X, projector)) np.testing.assert_array_equal(mean_proj, forest.predict_projection(X, projector)) return