Ejemplo n.º 1
0
    def test_var(self,):
        # test that the estimator calcualtes var correctly
        config = self._get_base_config()
        config['honest'] = True
        config['max_depth'] = 0
        config['inference'] = True
        config['n_estimators'] = 1000
        config['subforest_size'] = 2
        config['max_samples'] = .5
        config['n_jobs'] = 1
        n_features = 2
        # test api
        n = 100
        random_state = 123
        X, y, truth = self._get_regression_data(n, n_features, random_state)
        forest = RegressionForest(**config).fit(X, y)
        alpha = .1
        mean, var = forest.predict_and_var(X)
        lb = scipy.stats.norm.ppf(alpha / 2, loc=mean[:, 0], scale=np.sqrt(var[:, 0, 0])).reshape(-1, 1)
        ub = scipy.stats.norm.ppf(1 - alpha / 2, loc=mean[:, 0], scale=np.sqrt(var[:, 0, 0])).reshape(-1, 1)

        np.testing.assert_allclose(var, forest.predict_var(X))
        lbtest, ubtest = forest.predict_interval(X, alpha=alpha)
        np.testing.assert_allclose(lb, lbtest)
        np.testing.assert_allclose(ub, ubtest)
        meantest, lbtest, ubtest = forest.predict(X, interval=True, alpha=alpha)
        np.testing.assert_allclose(mean, meantest)
        np.testing.assert_allclose(lb, lbtest)
        np.testing.assert_allclose(ub, ubtest)
        np.testing.assert_allclose(np.sqrt(var[:, 0, 0]), forest.prediction_stderr(X)[:, 0])

        # test accuracy
        for n in [10, 100, 1000, 10000]:
            random_state = 123
            X, y, truth = self._get_regression_data(n, n_features, random_state)
            forest = RegressionForest(**config).fit(X, y)
            our_mean, our_var = forest.predict_and_var(X[:1])
            true_mean, true_var = np.mean(y), np.var(y) / y.shape[0]
            np.testing.assert_allclose(our_mean, true_mean, atol=0.05)
            np.testing.assert_allclose(our_var, true_var, atol=0.05, rtol=.1)
        for n, our_thr, true_thr in [(1000, .5, .25), (10000, .05, .05)]:
            random_state = 123
            config['max_depth'] = 1
            X, y, truth = self._get_step_regression_data(n, n_features, random_state)
            forest = RegressionForest(**config).fit(X, y)
            posX = X[X[:, 0] > our_thr]
            negX = X[X[:, 0] < -our_thr]
            our_pos_mean, our_pos_var = forest.predict_and_var(posX)
            our_neg_mean, our_neg_var = forest.predict_and_var(negX)
            pos = X[:, 0] > true_thr
            true_pos_mean, true_pos_var = np.mean(y[pos]), np.var(y[pos]) / y[pos].shape[0]
            neg = X[:, 0] < -true_thr
            true_neg_mean, true_neg_var = np.mean(y[neg]), np.var(y[neg]) / y[neg].shape[0]
            np.testing.assert_allclose(our_pos_mean, true_pos_mean, atol=0.07)
            np.testing.assert_allclose(our_pos_var, true_pos_var, atol=0.0, rtol=.25)
            np.testing.assert_allclose(our_neg_mean, true_neg_mean, atol=0.07)
            np.testing.assert_allclose(our_neg_var, true_neg_var, atol=0.0, rtol=.25)
        return
Ejemplo n.º 2
0
 def test_projection(self, ):
     # test the projection functionality of forests
     # test that the estimator calcualtes var correctly
     np.set_printoptions(precision=10, suppress=True)
     config = self._get_base_config()
     config['honest'] = True
     config['max_depth'] = 0
     config['inference'] = True
     config['n_estimators'] = 100
     config['subforest_size'] = 2
     config['max_samples'] = .5
     config['n_jobs'] = 1
     n_features = 2
     # test api
     n = 100
     random_state = 123
     X, y, truth = self._get_regression_data(n, n_features, random_state)
     forest = RegressionForest(**config).fit(X, y)
     mean, var = forest.predict_and_var(X)
     mean = mean.flatten()
     var = var.flatten()
     y = np.hstack([y, y])
     truth = np.hstack([truth, truth])
     forest = RegressionForest(**config).fit(X, y)
     projector = np.ones((X.shape[0], 2)) / 2.0
     mean_proj, var_proj = forest.predict_projection_and_var(X, projector)
     np.testing.assert_array_equal(mean_proj, mean)
     np.testing.assert_array_equal(var_proj, var)
     np.testing.assert_array_equal(
         var_proj, forest.predict_projection_var(X, projector))
     np.testing.assert_array_equal(mean_proj,
                                   forest.predict_projection(X, projector))
     return