Esempio n. 1
0
 def test_random_state(self):
     np.random.seed(123)
     n = 5000
     d = 5
     x_grid = np.linspace(-1, 1, 10)
     X_test = np.hstack(
         [x_grid.reshape(-1, 1),
          np.random.normal(size=(10, d - 1))])
     X = np.random.normal(0, 1, size=(n, d))
     y = X[:, 0] + np.random.normal(0, .1, size=(n, ))
     est = SubsampledHonestForest(n_estimators=100,
                                  max_depth=5,
                                  min_samples_leaf=10,
                                  verbose=0,
                                  random_state=12345)
     est.fit(X, y)
     point1 = est.predict(X_test)
     est = SubsampledHonestForest(n_estimators=100,
                                  max_depth=5,
                                  min_samples_leaf=10,
                                  verbose=0,
                                  random_state=12345)
     est.fit(X, y)
     point2 = est.predict(X_test)
     # Check that the point estimates are the same
     np.testing.assert_equal(point1, point2)
Esempio n. 2
0
 def test_dishonest_y1d(self):
     np.random.seed(123)
     n = 5000
     d = 1
     x_grid = np.linspace(-1, 1, 10)
     X_test = np.hstack(
         [x_grid.reshape(-1, 1),
          np.random.normal(size=(10, d - 1))])
     for _ in range(3):
         X = np.random.normal(0, 1, size=(n, d))
         y = 1. * (X[:, 0] > 0) + np.random.normal(0, .1, size=(n, ))
         est = SubsampledHonestForest(n_estimators=100,
                                      honest=False,
                                      max_depth=3,
                                      min_samples_leaf=10,
                                      verbose=0)
         est.fit(X, y)
         point = est.predict(X_test)
         lb, ub = est.predict_interval(X_test, alpha=0.01)
         np.testing.assert_allclose(point,
                                    1 * (X_test[:, 0] > 0),
                                    rtol=0,
                                    atol=.2)
         np.testing.assert_array_less(lb, 1 * (X_test[:, 0] > 0) + .05)
         np.testing.assert_array_less(1 * (X_test[:, 0] > 0), ub + .05)
Esempio n. 3
0
 def test_y2d(self):
     np.random.seed(123)
     n = 5000
     d = 5
     x_grid = np.linspace(-1, 1, 10)
     X_test = np.hstack(
         [x_grid.reshape(-1, 1),
          np.random.normal(size=(10, d - 1))])
     for _ in range(3):
         for criterion in ['mse', 'mae']:
             X = np.random.normal(0, 1, size=(n, d))
             y = X[:, [0, 0]] + np.random.normal(0, .1, size=(n, 2))
             est = SubsampledHonestForest(n_estimators=100,
                                          max_depth=5,
                                          criterion=criterion,
                                          min_samples_leaf=10,
                                          verbose=0)
             est.fit(X, y)
             point = est.predict(X_test)
             lb, ub = est.predict_interval(X_test, alpha=0.01)
             np.testing.assert_allclose(point,
                                        X_test[:, [0, 0]],
                                        rtol=0,
                                        atol=.2)
             np.testing.assert_array_less(lb, X_test[:, [0, 0]] + .05)
             np.testing.assert_array_less(X_test[:, [0, 0]], ub + .05)
 def test_nonauto_subsample_fr(self):
     np.random.seed(123)
     n = 5000
     d = 5
     x_grid = np.linspace(-1, 1, 10)
     X_test = np.hstack([x_grid.reshape(-1, 1), np.random.normal(size=(10, d - 1))])
     X = np.random.normal(0, 1, size=(n, d))
     y = X[:, 0] + np.random.normal(0, .1, size=(n,))
     est = SubsampledHonestForest(n_estimators=100, subsample_fr=.8, max_depth=5, min_samples_leaf=10, verbose=0)
     est.fit(X, y)
     point = est.predict(X_test)
     lb, ub = est.predict_interval(X_test, alpha=0.01)
     np.testing.assert_allclose(point, X_test[:, 0], rtol=0, atol=.2)
     np.testing.assert_array_less(lb, X_test[:, 0] + .05)
     np.testing.assert_array_less(X_test[:, 0], ub + .05)
def monte_carlo():
    n = 5000
    d = 5
    x_grid = np.linspace(-1, 1, 1000)
    X_test = np.hstack(
        [x_grid.reshape(-1, 1),
         np.random.normal(size=(1000, d - 1))])
    coverage = []
    exp_dict = {'point': [], 'low': [], 'up': []}
    for it in range(100):
        print(it)
        X = np.random.normal(0, 1, size=(n, d))
        y = X[:, 0] + np.random.normal(size=(n, ))
        est = SubsampledHonestForest(n_estimators=1000, verbose=1)
        est.fit(X, y)
        point = est.predict(X_test)
        low, up = est.predict_interval(X_test, alpha=0.05)
        coverage.append((low <= x_grid) & (x_grid <= up))
        exp_dict['point'].append(point)
        exp_dict['low'].append(low)
        exp_dict['up'].append(up)

    if not os.path.exists('figures'):
        os.makedirs('figures')
    if not os.path.exists(os.path.join("figures", 'honestforest')):
        os.makedirs(os.path.join("figures", 'honestforest'))

    plt.figure()
    plt.plot(x_grid, np.mean(coverage, axis=0))
    plt.savefig('figures/honestforest/coverage.png')

    plt.figure()
    plt.plot(x_grid,
             np.sqrt(np.mean((np.array(exp_dict['point']) - x_grid)**2,
                             axis=0)),
             label='RMSE')
    plt.savefig('figures/honestforest/rmse.png')

    plt.figure()
    plt.plot(x_grid,
             np.mean(np.array(exp_dict['up']) - np.array(exp_dict['low']),
                     axis=0),
             label='length')
    plt.savefig('figures/honestforest/length.png')
 def __init__(self,
              model_y, model_t,
              discrete_treatment=False,
              n_crossfit_splits=2,
              n_estimators=100,
              criterion="mse",
              max_depth=None,
              min_samples_split=2,
              min_samples_leaf=1,
              min_weight_fraction_leaf=0.,
              max_features="auto",
              max_leaf_nodes=None,
              min_impurity_decrease=0.,
              subsample_fr='auto',
              honest=True,
              n_jobs=None,
              verbose=0,
              random_state=None):
     model_final = SubsampledHonestForest(n_estimators=n_estimators,
                                          criterion=criterion,
                                          max_depth=max_depth,
                                          min_samples_split=min_samples_split,
                                          min_samples_leaf=min_samples_leaf,
                                          min_weight_fraction_leaf=min_weight_fraction_leaf,
                                          max_features=max_features,
                                          max_leaf_nodes=max_leaf_nodes,
                                          min_impurity_decrease=min_impurity_decrease,
                                          subsample_fr=subsample_fr,
                                          honest=honest,
                                          n_jobs=n_jobs,
                                          random_state=random_state,
                                          verbose=verbose)
     super().__init__(model_y=model_y, model_t=model_t,
                      model_final=model_final, featurizer=None,
                      discrete_treatment=discrete_treatment,
                      n_splits=n_crossfit_splits, random_state=random_state)