def test_dml(self): ################################# # Single treatment and outcome # ################################# X = TestPandasIntegration.df[TestPandasIntegration.features] W = TestPandasIntegration.df[TestPandasIntegration.controls] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat] # Test LinearDML est = LinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # Test re-fit X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns}) est.fit(Y, T, X=X1, W=W, inference='statsmodels') self._check_input_names(est.summary(), feat_comp=X1.columns) # Test SparseLinearDML est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # ForestDML est = ForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='blb') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) #################################### # Mutiple treatments and outcomes # #################################### Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi] # Test LinearDML est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True) est.fit(Y, T, X=X, W=W, inference='bootstrap') # Check bootstrap as well self._check_input_names(est.summary(), True, True) self._check_popsum_names( est.effect_inference(X).population_summary(), True) # Test SparseLinearDML est = SparseLinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True)
def test_all_kinds(self): T = [1, 0, 1, 2, 0, 2] * 5 Y = [1, 2, 3, 4, 5, 6] * 5 X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1) est = LinearDML(n_splits=2) for kind in ['percentile', 'pivot', 'normal']: with self.subTest(kind=kind): inference = BootstrapInference(n_bootstrap_samples=5, bootstrap_type=kind) est.fit(Y, T, inference=inference) i = est.const_marginal_effect_interval() inf = est.const_marginal_effect_inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) est.fit(Y, T, X=X, inference=inference) i = est.const_marginal_effect_interval(X) inf = est.const_marginal_effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.coef__interval() inf = est.coef__inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.effect_interval(X) inf = est.effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1])
def test_refit_final_inference(self): """Test that we can perform inference during refit_final""" est = LinearDML(linear_first_stages=False, featurizer=PolynomialFeatures(1, include_bias=False)) X = np.random.choice(np.arange(5), size=(500, 3)) y = np.random.normal(size=(500,)) T = np.random.choice(np.arange(3), size=(500, 2)) W = np.random.normal(size=(500, 2)) est.fit(y, T, X=X, W=W, cache_values=True, inference='statsmodels') assert isinstance(est.effect_inference(X), NormalInferenceResults) with pytest.raises(ValueError): est.refit_final(inference=BootstrapInference(2))
def test_dml(self): ################################# # Single treatment and outcome # ################################# X = TestPandasIntegration.df[TestPandasIntegration.features] W = TestPandasIntegration.df[TestPandasIntegration.controls] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat] # Test LinearDML est = LinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # |--> Test featurizers est.featurizer = PolynomialFeatures(degree=2, include_bias=False) est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names( est.summary(), feat_comp=est.original_featurizer.get_feature_names(X.columns)) est.featurizer = FunctionTransformer() est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names( est.summary(), feat_comp=[ f"feat(X){i}" for i in range(TestPandasIntegration.n_features) ]) est.featurizer = ColumnTransformer([('passthrough', 'passthrough', [0]) ]) est.fit(Y, T, X=X, W=W, inference='statsmodels') # ColumnTransformer doesn't propagate column names self._check_input_names(est.summary(), feat_comp=["x0"]) # |--> Test re-fit est.featurizer = None X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns}) est.fit(Y, T, X=X1, W=W, inference='statsmodels') self._check_input_names(est.summary(), feat_comp=X1.columns) # Test SparseLinearDML est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # Test ForestDML est = ForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='blb') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) #################################### # Mutiple treatments and outcomes # #################################### Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi] # Test LinearDML est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True) est.fit(Y, T, X=X, W=W, inference='bootstrap') # Check bootstrap as well self._check_input_names(est.summary(), True, True) self._check_popsum_names( est.effect_inference(X).population_summary(), True) # Test SparseLinearDML est = SparseLinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True)