Beispiel #1
0
    def test_drlearner(self):
        y, T, X, W = self._get_data()

        for est in [LinearDRLearner(random_state=123),
                    SparseLinearDRLearner(random_state=123)]:
            est.fit(y, T, X=X, W=W, cache_values=True)
            np.testing.assert_equal(est.model_regression, 'auto')
            est.model_regression = LinearRegression()
            est.model_propensity = LogisticRegression()
            est.fit(y, T, X=X, W=W, cache_values=True)
            assert isinstance(est.model_regression, LinearRegression)
            with pytest.raises(ValueError):
                est.multitask_model_final = True
            with pytest.raises(ValueError):
                est.model_final = LinearRegression()
            est.min_propensity = .1
            est.mc_iters = 2
            est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
            est.refit_final()
            assert isinstance(est.featurizer_, PolynomialFeatures)
            np.testing.assert_equal(est.mc_iters, 2)
            intcpt = est.intercept_(T=1)
            est.fit_cate_intercept = False
            np.testing.assert_equal(est.intercept_(T=1), intcpt)
            est.refit_final()
            with pytest.raises(AttributeError):
                est.intercept(T=1)
            est.fit(y, T, X=X, W=W, cache_values=False)
            with pytest.raises(AssertionError):
                est.refit_final()
 def test_drlearners(self):
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.bin_treat]
     # Test LinearDRLearner
     est = LinearDRLearner(model_propensity=GradientBoostingClassifier(),
                           model_regression=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(T=1))
     self._check_popsum_names(est.effect_inference(X).population_summary())
     # Test SparseLinearDRLearner
     est = SparseLinearDRLearner(
         model_propensity=GradientBoostingClassifier(),
         model_regression=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(T=1))
     self._check_popsum_names(est.effect_inference(X).population_summary())
     # Test ForestDRLearner
     est = ForestDRLearner(model_propensity=GradientBoostingClassifier(),
                           model_regression=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_popsum_names(est.effect_inference(X).population_summary())
Beispiel #3
0
 def test_sparse(self):
     """SparseDRLearner test with a sparse DGP"""
     # Sparse DGP
     np.random.seed(123)
     n_x = 50
     n_nonzero = 1
     n_w = 5
     n = 1000
     # Treatment effect coef
     a = np.zeros(n_x)
     nonzero_idx = np.random.choice(n_x, size=n_nonzero, replace=False)
     a[nonzero_idx] = 1
     # Other coefs
     b = np.zeros(n_x + n_w)
     g = np.zeros(n_x + n_w)
     b_nonzero = np.random.choice(n_x + n_w, size=n_nonzero, replace=False)
     g_nonzero = np.random.choice(n_x + n_w, size=n_nonzero, replace=False)
     b[b_nonzero] = 1
     g[g_nonzero] = 1
     # Features and controls
     x = np.random.normal(size=(n, n_x))
     w = np.random.normal(size=(n, n_w))
     xw = np.hstack([x, w])
     T = np.random.binomial(1, scipy.special.expit(xw @ b))
     err_Y = np.random.normal(size=n, scale=0.5)
     Y = T * (x @ a) + xw @ g + err_Y
     # Test sparse estimator
     # --> test coef_, intercept_
     sparse_dml = SparseLinearDRLearner(featurizer=FunctionTransformer())
     sparse_dml.fit(Y, T, x, w, inference='debiasedlasso')
     np.testing.assert_allclose(a, sparse_dml.coef_(T=1), atol=2e-1)
     np.testing.assert_allclose(sparse_dml.intercept_(T=1), 0, atol=2e-1)
     # --> test treatment effects
     # Restrict x_test to vectors of norm < 1
     x_test = np.random.uniform(size=(10, n_x))
     true_eff = (x_test @ a)
     eff = sparse_dml.effect(x_test, T0=0, T1=1)
     np.testing.assert_allclose(true_eff, eff, atol=0.5)
     # --> check inference
     y_lower, y_upper = sparse_dml.effect_interval(x_test, T0=0, T1=1)
     in_CI = ((y_lower < true_eff) & (true_eff < y_upper))
     # Check that a majority of true effects lie in the 5-95% CI
     self.assertTrue(in_CI.mean() > 0.8)
Beispiel #4
0
 def test_dr_random_state(self):
     Y, T, X, W, X_test = self._make_data(500, 2)
     for est in [
             DRLearner(model_final=RandomForestRegressor(
                 max_depth=3,
                 n_estimators=10,
                 min_samples_leaf=100,
                 bootstrap=True,
                 random_state=123),
                       cv=2,
                       random_state=123),
             LinearDRLearner(random_state=123),
             SparseLinearDRLearner(cv=2, random_state=123),
             ForestDRLearner(
                 model_regression=RandomForestRegressor(n_estimators=10,
                                                        max_depth=4,
                                                        random_state=123),
                 model_propensity=RandomForestClassifier(n_estimators=10,
                                                         max_depth=4,
                                                         random_state=123),
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)