def test_drlearner(self): y, T, X, W = self._get_data() for est in [LinearDRLearner(random_state=123), SparseLinearDRLearner(random_state=123)]: est.fit(y, T, X=X, W=W, cache_values=True) np.testing.assert_equal(est.model_regression, 'auto') est.model_regression = LinearRegression() est.model_propensity = LogisticRegression() est.fit(y, T, X=X, W=W, cache_values=True) assert isinstance(est.model_regression, LinearRegression) with pytest.raises(ValueError): est.multitask_model_final = True with pytest.raises(ValueError): est.model_final = LinearRegression() est.min_propensity = .1 est.mc_iters = 2 est.featurizer = PolynomialFeatures(degree=2, include_bias=False) est.refit_final() assert isinstance(est.featurizer_, PolynomialFeatures) np.testing.assert_equal(est.mc_iters, 2) intcpt = est.intercept_(T=1) est.fit_cate_intercept = False np.testing.assert_equal(est.intercept_(T=1), intcpt) est.refit_final() with pytest.raises(AttributeError): est.intercept(T=1) est.fit(y, T, X=X, W=W, cache_values=False) with pytest.raises(AssertionError): est.refit_final()
def test_drlearners(self): X = TestPandasIntegration.df[TestPandasIntegration.features] W = TestPandasIntegration.df[TestPandasIntegration.controls] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.bin_treat] # Test LinearDRLearner est = LinearDRLearner(model_propensity=GradientBoostingClassifier(), model_regression=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(T=1)) self._check_popsum_names(est.effect_inference(X).population_summary()) # Test SparseLinearDRLearner est = SparseLinearDRLearner( model_propensity=GradientBoostingClassifier(), model_regression=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(T=1)) self._check_popsum_names(est.effect_inference(X).population_summary()) # Test ForestDRLearner est = ForestDRLearner(model_propensity=GradientBoostingClassifier(), model_regression=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='blb') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_popsum_names(est.effect_inference(X).population_summary())
def test_sparse(self): """SparseDRLearner test with a sparse DGP""" # Sparse DGP np.random.seed(123) n_x = 50 n_nonzero = 1 n_w = 5 n = 1000 # Treatment effect coef a = np.zeros(n_x) nonzero_idx = np.random.choice(n_x, size=n_nonzero, replace=False) a[nonzero_idx] = 1 # Other coefs b = np.zeros(n_x + n_w) g = np.zeros(n_x + n_w) b_nonzero = np.random.choice(n_x + n_w, size=n_nonzero, replace=False) g_nonzero = np.random.choice(n_x + n_w, size=n_nonzero, replace=False) b[b_nonzero] = 1 g[g_nonzero] = 1 # Features and controls x = np.random.normal(size=(n, n_x)) w = np.random.normal(size=(n, n_w)) xw = np.hstack([x, w]) T = np.random.binomial(1, scipy.special.expit(xw @ b)) err_Y = np.random.normal(size=n, scale=0.5) Y = T * (x @ a) + xw @ g + err_Y # Test sparse estimator # --> test coef_, intercept_ sparse_dml = SparseLinearDRLearner(featurizer=FunctionTransformer()) sparse_dml.fit(Y, T, x, w, inference='debiasedlasso') np.testing.assert_allclose(a, sparse_dml.coef_(T=1), atol=2e-1) np.testing.assert_allclose(sparse_dml.intercept_(T=1), 0, atol=2e-1) # --> test treatment effects # Restrict x_test to vectors of norm < 1 x_test = np.random.uniform(size=(10, n_x)) true_eff = (x_test @ a) eff = sparse_dml.effect(x_test, T0=0, T1=1) np.testing.assert_allclose(true_eff, eff, atol=0.5) # --> check inference y_lower, y_upper = sparse_dml.effect_interval(x_test, T0=0, T1=1) in_CI = ((y_lower < true_eff) & (true_eff < y_upper)) # Check that a majority of true effects lie in the 5-95% CI self.assertTrue(in_CI.mean() > 0.8)
def test_dr_random_state(self): Y, T, X, W, X_test = self._make_data(500, 2) for est in [ DRLearner(model_final=RandomForestRegressor( max_depth=3, n_estimators=10, min_samples_leaf=100, bootstrap=True, random_state=123), cv=2, random_state=123), LinearDRLearner(random_state=123), SparseLinearDRLearner(cv=2, random_state=123), ForestDRLearner( model_regression=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123), model_propensity=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123), cv=2, random_state=123) ]: TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)