def test_can_use_interpreters(self): n = 100 for t_shape in [(n, ), (n, 1)]: for y_shape in [(n, ), (n, 1)]: X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=t_shape) Y = (T.flatten() * (2 * (X[:, 0] > 0) - 1)).reshape(y_shape) est = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True) est.fit(Y, T, X=X) for intrp in [ SingleTreeCateInterpreter(), SingleTreePolicyInterpreter() ]: with self.subTest(t_shape=t_shape, y_shape=y_shape, intrp=intrp): with self.assertRaises(Exception): # prior to calling interpret, can't plot, render, etc. intrp.plot() intrp.interpret(est, X) intrp.plot() intrp.render('tmp.pdf', view=False) intrp.export_graphviz()
def test_can_assign_treatment(self): n = 100 X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=(n, )) Y = (2 * (X[:, 0] > 0) - 1) * T.flatten() est = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True) est.fit(Y, T, X=X) # can interpret without uncertainty intrp = SingleTreePolicyInterpreter() with self.assertRaises(Exception): # can't treat before interpreting intrp.treat(X) intrp.interpret(est, X) T_policy = intrp.treat(X) assert T.shape == T_policy.shape intrp.interpret(est, X, sample_treatment_costs=np.ones((T.shape[0], 1))) T_policy = intrp.treat(X) assert T.shape == T_policy.shape with np.testing.assert_raises(ValueError): intrp.interpret(est, X, sample_treatment_costs=np.ones((T.shape[0], 2)))
def test_isolate_inferenceresult_from_estimator(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W est = LinearDML().fit(Y, T, X=X, W=W) coef = est.coef_ inf = est.coef__inference() inf.pred[0] = .5 new_coef = est.coef_ np.testing.assert_array_equal(coef, new_coef)
def test_discrete_treatment(self): """Test that we can perform nuisance averaging, and that it reduces the variance in a simple example.""" y = np.random.normal(size=30) + [0, 1] * 15 T = np.random.binomial(1, .5, size=(30, )) W = np.random.normal(size=(30, 3)) est1 = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True) est2 = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True, mc_iters=2) est3 = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True, mc_iters=2, mc_agg='median') # Run ten experiments, recomputing the variance of 10 estimates of the effect in each experiment v1s = [ np.var([est1.fit(y, T, W=W).effect() for _ in range(10)]) for _ in range(10) ] v2s = [ np.var([est2.fit(y, T, W=W).effect() for _ in range(10)]) for _ in range(10) ] v3s = [ np.var([est3.fit(y, T, W=W).effect() for _ in range(10)]) for _ in range(10) ] # The average variance should be lower when using monte carlo iterations assert np.mean(v2s) < np.mean(v1s) assert np.mean(v3s) < np.mean(v1s)
def test_translte(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W for offset in [10, pd.Series(np.arange(TestInference.X.shape[0]))]: for inf in ['auto', BootstrapInference(n_bootstrap_samples=5)]: est = LinearDML().fit(Y, T, X=X, W=W, inference=inf) inf = est.const_marginal_effect_inference(X) pred, bounds, summary = inf.point_estimate, inf.conf_int( ), inf.summary_frame() inf.translate(offset) pred2, bounds2, summary2 = inf.point_estimate, inf.conf_int( ), inf.summary_frame() np.testing.assert_array_equal(pred + offset, pred2) np.testing.assert_array_almost_equal(bounds[0] + offset, bounds2[0]) np.testing.assert_array_almost_equal(bounds[1] + offset, bounds2[1])
def test_can_assign_treatment(self): n = 100 X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=(n,)) Y = np.random.normal(size=(n,)) est = LinearDML(discrete_treatment=True) est.fit(Y, T, X=X) # can interpret without uncertainty intrp = SingleTreePolicyInterpreter() with self.assertRaises(Exception): # can't treat before interpreting intrp.treat(X) intrp.interpret(est, X) T_policy = intrp.treat(X) assert T.shape == T_policy.shape
def test_with_econml(self): """Test that we can bootstrap econml estimators.""" x = np.random.normal(size=(1000, 2)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression()) est.fit(y, t, X=x) bs = BootstrapEstimator(est, 50) # test that we can fit with the same arguments as the base estimator bs.fit(y, t, X=x) # test that we can get the same attribute for the bootstrap as the original, with the same shape self.assertEqual(np.shape(est.coef_), np.shape(bs.coef_)) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = bs.coef__interval() for bound in [lower, upper]: self.assertEqual(np.shape(est.coef_), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that we can do the same thing once we provide percentile bounds lower, upper = bs.coef__interval(lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.coef_), np.shape(bound)) # test that we can do the same thing with the results of a method, rather than an attribute self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bs.effect(x, T0=t, T1=t2))) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = bs.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that we can do the same thing once we provide percentile bounds lower, upper = bs.effect_interval(x, T0=t, T1=t2, lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any()
def test_dowhy(self): def reg(): return LinearRegression() def clf(): return LogisticRegression() Y, T, X, W, Z = self._get_data() # test at least one estimator from each category models = {"dml": LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, linear_first_stages=False), "dr": DRLearner(model_propensity=clf(), model_regression=reg(), model_final=reg()), "forestdr": ForestDRLearner(model_propensity=clf(), model_regression=reg()), "xlearner": XLearner(models=reg(), cate_models=reg(), propensity_model=clf()), "cfdml": CausalForestDML(model_y=reg(), model_t=clf(), discrete_treatment=True), "orf": DROrthoForest(n_trees=10, propensity_model=clf(), model_Y=reg()), "orthoiv": OrthoIV(model_y_xw=reg(), model_t_xw=clf(), model_z_xw=reg(), discrete_treatment=True, discrete_instrument=False), "dmliv": DMLIV(fit_cate_intercept=True, discrete_treatment=True, discrete_instrument=False), "driv": LinearDRIV(flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), fit_cate_intercept=True, discrete_instrument=False, discrete_treatment=True)} for name, model in models.items(): with self.subTest(name=name): est = model if name == "xlearner": est_dowhy = est.dowhy.fit(Y, T, X=np.hstack((X, W)), W=None) elif name in ["orthoiv", "dmliv", "driv"]: est_dowhy = est.dowhy.fit(Y, T, Z=Z, X=X, W=W) else: est_dowhy = est.dowhy.fit(Y, T, X=X, W=W) # test causal graph est_dowhy.view_model() # test refutation estimate est_dowhy.refute_estimate(method_name="random_common_cause") if name != "orf": est_dowhy.refute_estimate(method_name="add_unobserved_common_cause", confounders_effect_on_treatment="binary_flip", confounders_effect_on_outcome="linear", effect_strength_on_treatment=0.1, effect_strength_on_outcome=0.1,) est_dowhy.refute_estimate(method_name="placebo_treatment_refuter", placebo_type="permute", num_simulations=3) est_dowhy.refute_estimate(method_name="data_subset_refuter", subset_fraction=0.8, num_simulations=3)
def test_can_summarize(self): LinearDML(model_t=LinearRegression(), model_y=LinearRegression()).fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W).summary() LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), fit_cate_intercept=False).fit( TestInference.Y, TestInference.T > 0, TestInference.X, TestInference.W, inference=BootstrapInference(5)).summary(1)
def test_alpha(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression()) est.fit(Y, T, X=X, W=W) # ensure alpha is passed lb, ub = est.const_marginal_ate_interval(X, alpha=1) assert (lb == ub).all() lb, ub = est.const_marginal_ate_interval(X) assert (lb != ub).all()
def test_refit_final_inference(self): """Test that we can perform inference during refit_final""" est = LinearDML(linear_first_stages=False, featurizer=PolynomialFeatures(1, include_bias=False)) X = np.random.choice(np.arange(5), size=(500, 3)) y = np.random.normal(size=(500,)) T = np.random.choice(np.arange(3), size=(500, 2)) W = np.random.normal(size=(500, 2)) est.fit(y, T, X=X, W=W, cache_values=True, inference='statsmodels') assert isinstance(est.effect_inference(X), NormalInferenceResults) with pytest.raises(ValueError): est.refit_final(inference=BootstrapInference(2))
def test_dml_random_state(self): Y, T, X, W, X_test = TestRandomState._make_data(500, 2) for est in [ NonParamDML(model_y=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123), model_t=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123), model_final=RandomForestRegressor( max_depth=3, n_estimators=10, min_samples_leaf=100, bootstrap=True, random_state=123), discrete_treatment=True, n_splits=2, random_state=123), CausalForestDML( model_y=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123), model_t=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123), n_estimators=8, discrete_treatment=True, cv=2, random_state=123), LinearDML(model_y=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123), model_t=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123), discrete_treatment=True, n_splits=2, random_state=123), SparseLinearDML(discrete_treatment=True, n_splits=2, random_state=123), KernelDML(discrete_treatment=True, n_splits=2, random_state=123) ]: TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
def dml(outcome, treatment, data, method='GBR'): if method == 'GBR': est = NonParamDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingClassifier(), model_final=GradientBoostingRegressor(), discrete_treatment=True) est.fit(data[outcome], data[treatment], X=data.drop(columns=[outcome, treatment]), W=data.drop(columns=[outcome, treatment])) point = est.ate(data.drop(columns=[outcome, treatment]), T0=0, T1=1) if method == 'linear': est = LinearDML(discrete_treatment=True) est.fit(data[outcome], data[treatment], X=data.drop(columns=[outcome, treatment]), W=data.drop(columns=[outcome, treatment])) point = est.ate(data.drop(columns=[outcome, treatment]), T0=0, T1=1) return point
def test_mean_pred_stderr(self): """Test that mean_pred_stderr is not None when estimator's final stage is linear""" Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W ests = [ LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False)), LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False)) ] for est in ests: est.fit(Y, T, X=X, W=W) assert est.const_marginal_effect_inference( X).population_summary().mean_pred_stderr is not None # only is not None when T1 is a constant or a list of constant assert est.effect_inference( X).population_summary().mean_pred_stderr is not None if est.__class__.__name__ == "LinearDRLearner": assert est.coef__inference(T=1).mean_pred_stderr is None else: assert est.coef__inference().mean_pred_stderr is None
def test_cate_uncertainty_needs_inference(self): n = 100 X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=(n,)) Y = np.random.normal(size=(n,)) est = LinearDML(discrete_treatment=True) est.fit(Y, T, X=X, inference=None) # can interpret without uncertainty intrp = SingleTreeCateInterpreter() intrp.interpret(est, X) intrp = SingleTreeCateInterpreter(include_model_uncertainty=True) with self.assertRaises(Exception): # can't interpret with uncertainty if inference wasn't used during fit intrp.interpret(est, X) # can interpret with uncertainty if we refit est.fit(Y, T, X=X) intrp.interpret(est, X)
def test_internal(self): """Test that the internal use of bootstrap within an estimator works.""" x = np.random.normal(size=(1000, 2)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression()) est.fit(y, t, X=x, inference='bootstrap') # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds eff = est.effect(x, T0=t, T1=t2) lower, upper = est.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.9 # test that we can do the same thing once we provide alpha explicitly lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.8
def test_all_kinds(self): T = [1, 0, 1, 2, 0, 2] * 5 Y = [1, 2, 3, 4, 5, 6] * 5 X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1) est = LinearDML(n_splits=2) for kind in ['percentile', 'pivot', 'normal']: with self.subTest(kind=kind): inference = BootstrapInference(n_bootstrap_samples=5, bootstrap_type=kind) est.fit(Y, T, inference=inference) i = est.const_marginal_effect_interval() inf = est.const_marginal_effect_inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) est.fit(Y, T, X=X, inference=inference) i = est.const_marginal_effect_interval(X) inf = est.const_marginal_effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.coef__interval() inf = est.coef__inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.effect_interval(X) inf = est.effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1])
def test_stratify(self): """Test that we can properly stratify by treatment""" T = [1, 0, 1, 2, 0, 2] Y = [1, 2, 3, 4, 5, 6] X = np.array([1, 1, 2, 2, 1, 2]).reshape(-1, 1) est = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True) inference = BootstrapInference(n_bootstrap_samples=5) est.fit(Y, T, inference=inference) est.const_marginal_effect_interval() est.fit(Y, T, X=X, inference=inference) est.const_marginal_effect_interval(X) est.fit(Y, np.asarray(T).reshape(-1, 1), inference=inference) # test stratifying 2D treatment est.const_marginal_effect_interval()
def test_dml(self): ################################# # Single treatment and outcome # ################################# X = TestPandasIntegration.df[TestPandasIntegration.features] W = TestPandasIntegration.df[TestPandasIntegration.controls] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat] # Test LinearDML est = LinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # |--> Test featurizers est.featurizer = PolynomialFeatures(degree=2, include_bias=False) est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names( est.summary(), feat_comp=est.original_featurizer.get_feature_names(X.columns)) est.featurizer = FunctionTransformer() est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names( est.summary(), feat_comp=[ f"feat(X){i}" for i in range(TestPandasIntegration.n_features) ]) est.featurizer = ColumnTransformer([('passthrough', 'passthrough', [0]) ]) est.fit(Y, T, X=X, W=W, inference='statsmodels') # ColumnTransformer doesn't propagate column names self._check_input_names(est.summary(), feat_comp=["x0"]) # |--> Test re-fit est.featurizer = None X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns}) est.fit(Y, T, X=X1, W=W, inference='statsmodels') self._check_input_names(est.summary(), feat_comp=X1.columns) # Test SparseLinearDML est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # Test ForestDML est = ForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='blb') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) #################################### # Mutiple treatments and outcomes # #################################### Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi] # Test LinearDML est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True) est.fit(Y, T, X=X, W=W, inference='bootstrap') # Check bootstrap as well self._check_input_names(est.summary(), True, True) self._check_popsum_names( est.effect_inference(X).population_summary(), True) # Test SparseLinearDML est = SparseLinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True)
def test_can_set_discrete_treatment(self): X = np.random.choice(np.arange(5), size=(500, 3)) y = np.random.normal(size=(500,)) T = np.random.choice(np.arange(3), size=(500, 1)) W = np.random.normal(size=(500, 2)) est = LinearDML(model_y=RandomForestRegressor(), model_t=RandomForestClassifier(min_samples_leaf=10), discrete_treatment=True, linear_first_stages=False, cv=3) est.fit(y, T, X=X, W=W) est.effect(X) est.discrete_treatment = False est.fit(y, T, X=X, W=W) est.effect(X)
def test_dml_sum_vs_original_rf(self): """ Testing that the summarized version of DML gives the same results as the non-summarized when RandomForest is used for first stage models. """ np.random.seed(123) def first_stage_model(): return RandomForestRegressor(n_estimators=10, bootstrap=False, random_state=123) n = 1000 for d in [1, 5]: for p in [1, 5]: for cov_type in ['nonrobust', 'HC0', 'HC1']: for alpha in [.01, .05, .2]: X = np.random.binomial(1, .8, size=(n, d)) T = np.random.binomial(1, .5 * X[:, 0] + .25, size=(n,)) def true_effect(x): return np.hstack([x[:, [0]] + t for t in range(p)]) y = true_effect(X) * T.reshape(-1, 1) + X[:, [0] * p] + \ (1 * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p)) if p == 1: y = y.flatten() X_test = np.random.binomial(1, .5, size=(100, d)) XT = np.hstack([X, T.reshape(-1, 1)]) (X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec, var_first, var_sec) = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack([X_final_first, X_final_sec]) y_sum = np.concatenate((y_sum_first, y_sum_sec)) n_sum = np.concatenate((n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) first_half = len(y1) class SplitterSum: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])), (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))] est = LinearDML( model_y=first_stage_model(), model_t=first_stage_model(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False).fit(y_sum, X_final[:, -1], X_final[:, :-1], None, sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference(cov_type=cov_type)) class Splitter: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])), (np.arange(first_half, X.shape[0]), np.arange(0, first_half))] lr = LinearDML( model_y=first_stage_model(), model_t=first_stage_model(), n_splits=Splitter(), linear_first_stages=False, discrete_treatment=False).fit(y, X[:, -1], X[:, :-1], None, inference=StatsModelsInference(cov_type=cov_type)) _compare_dml_classes(est, lr, X_test, alpha=alpha)
def test_summary(self): """Tests the inference results summary for continuous treatment estimators.""" # Test inference results when `cate_feature_names` doesn not exist for inference in [BootstrapInference(n_bootstrap_samples=5), 'auto']: cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False) ) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = cate_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] default_names = get_input_columns(TestInference.X) fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(default_names) np.testing.assert_array_equal(coef_rows, fnames) intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0] np.testing.assert_array_equal(intercept_rows, ['cate_intercept']) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False) ) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(input_features=fnames) np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = cate_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = wrapped_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = wrapped_est.summary(feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames)
def test_dominicks(): file_name = "oj_large.csv" if not os.path.isfile(file_name): print("Downloading file (this might take a few seconds)...") urllib.request.urlretrieve( "https://msalicedatapublic.blob.core.windows.net/datasets/OrangeJuice/oj_large.csv", file_name) oj_data = pd.read_csv(file_name) brands = sorted(set(oj_data["brand"])) stores = sorted(set(oj_data["store"])) featnames = ["week", "feat"] + list(oj_data.columns[6:]) # Preprocess data import datetime import numpy as np # Convert 'week' to a date # week_zero = datetime.datetime.strptime("09/07/89", "%m/%d/%y") # oj_data["week"] = pd.to_timedelta(oj_data["week"], unit='w') + week_zero # Take log of price oj_data["logprice"] = np.log(oj_data["price"]) oj_data.drop("price", axis=1, inplace=True) # Make brand numeric oj_data["brand"] = [brands.index(b) for b in oj_data["brand"]] class PriceFeaturizer(TransformerMixin): def __init__(self, n_prods, own_price=True, cross_price_groups=False, cross_price_indiv=True, per_product_effects=True): base_arrays = [] effect_names = [] one_hots = [(0, ) * p + (1, ) + (0, ) * (n_prods - p - 1) for p in range(n_prods)] if own_price: base_arrays.append(np.eye(n_prods)) effect_names.append("own price") if cross_price_groups: base_arrays.append((np.ones( (n_prods, n_prods)) - np.eye(n_prods)) / (n_prods - 1)) effect_names.append("group cross price") if cross_price_indiv: for p in range(n_prods): base_arrays.append(one_hots[p] * np.ones((n_prods, 1)) - np.diag(one_hots[p])) effect_names.append("cross price effect {} ->".format(p)) if per_product_effects: all = [(np.diag(one_hots[p]) @ arr, nm + " {}".format(p)) for arr, nm in zip(base_arrays, effect_names) for p in range(n_prods)] # remove meaningless features (e.g. cross-price effects of products on themselves), # which have all zero coeffs nonempty = [(arr, nm) for arr, nm in all if np.count_nonzero(arr) > 0] self._features = [arr for arr, _ in nonempty] self._names = [nm for _, nm in nonempty] else: self._features = base_arrays self._names = effect_names def fit(self, X): self._is_fitted = True assert shape(X)[1] == 0 return self def transform(self, X): assert self._is_fitted assert shape(X)[1] == 0 return np.tile(self._features, (shape(X)[0], 1, 1, 1)) @property def names(self): return self._names for name, op, xp_g, xp_i, pp in [ ("Homogeneous treatment effect", True, False, False, False), ("Heterogeneous treatment effects", True, False, False, True), (("Heterogeneous treatment effects" " with group effects"), True, True, False, True), (("Heterogeneous treatment effects" " with cross price effects"), True, False, True, True) ]: print(name) np.random.seed(42) ft = PriceFeaturizer(n_prods=3, own_price=op, cross_price_groups=xp_g, cross_price_indiv=xp_i, per_product_effects=pp) names = ft.names dml = LinearDML(model_y=RandomForestRegressor(), model_t=RandomForestRegressor(), featurizer=ft, n_splits=2) effects = [] for store in stores: data = oj_data[oj_data['store'] == store].sort_values( by=['week', 'brand']) dml.fit(T=reshape(data.as_matrix(["logprice"]), (-1, 3)), Y=reshape(data.as_matrix(["logmove"]), (-1, 3)), W=reshape(data.as_matrix(featnames), (-1, 3 * len(featnames)))) effects.append(dml.coef_) effects = np.array(effects) for nm, eff in zip(names, effects.T): print(" Effect: {}".format(nm)) print(" Mean: {}".format(np.mean(eff))) print(" Std.: {}".format(np.std(eff))) class ConstFt(TransformerMixin): def fit(self, X): return self def transform(self, X): return np.ones((shape(X)[0], 1)) print("Vanilla HTE+XP") np.random.seed(42) dml = LinearDML(model_y=RandomForestRegressor(), model_t=RandomForestRegressor(), featurizer=ConstFt(), n_splits=2) effects = [] for store in stores: data = oj_data[oj_data['store'] == store].sort_values( by=['week', 'brand']) dml.fit(T=reshape(data.as_matrix(["logprice"]), (-1, 3)), Y=reshape(data.as_matrix(["logmove"]), (-1, 3)), W=reshape(data.as_matrix(featnames), (-1, 3 * len(featnames)))) effects.append(dml.coef_) effects = np.array(effects) names = ["{} on {}".format(i, j) for j in range(3) for i in range(3)] for nm, eff in zip(names, reshape(effects, (-1, 9)).T): print(" Effect: {}".format(nm)) print(" Mean: {}".format(np.mean(eff))) print(" Std.: {}".format(np.std(eff)))
def test_random_cate_settings(self): """Verify that we can call methods on the CATE interpreter with various combinations of inputs""" n = 100 for _ in range(100): t_shape = (n, ) if self.coinflip() else (n, 1) y_shape = (n, ) if self.coinflip() else (n, 1) discrete_t = self.coinflip() X = np.random.normal(size=(n, 4)) X2 = np.random.normal(size=(10, 4)) T = np.random.binomial( 2, 0.5, size=t_shape) if discrete_t else np.random.normal( size=t_shape) Y = ((T.flatten() == 1) * (2 * (X[:, 0] > 0) - 1) + (T.flatten() == 2) * (2 * (X[:, 1] > 0) - 1)).reshape(y_shape) if self.coinflip(): y_shape = (n, 2) Y = np.tile(Y.reshape((-1, 1)), (1, 2)) est = LinearDML(model_y=LinearRegression(), model_t=LogisticRegression() if discrete_t else LinearRegression(), discrete_treatment=discrete_t) fit_kwargs = {} cate_init_kwargs = {} policy_init_kwargs = {} intrp_kwargs = {} policy_intrp_kwargs = {} common_kwargs = {} plot_kwargs = {} render_kwargs = {} export_kwargs = {} if self.coinflip(): cate_init_kwargs.update(include_model_uncertainty=True) policy_init_kwargs.update(risk_level=0.1) else: fit_kwargs.update(inference=None) if self.coinflip(): cate_init_kwargs.update(uncertainty_level=0.01) if self.coinflip(): policy_init_kwargs.update(risk_seeking=True) if self.coinflip(1 / 3): policy_intrp_kwargs.update(sample_treatment_costs=0.1) elif self.coinflip(): if discrete_t: policy_intrp_kwargs.update( sample_treatment_costs=np.random.normal(size=(10, 2))) else: if self.coinflip(): policy_intrp_kwargs.update( sample_treatment_costs=np.random.normal(size=(10, 1))) else: policy_intrp_kwargs.update( sample_treatment_costs=np.random.normal( size=(10, ))) if self.coinflip(): common_kwargs.update(feature_names=['A', 'B', 'C', 'D']) if self.coinflip(): common_kwargs.update(filled=False) if self.coinflip(): common_kwargs.update(rounded=False) if self.coinflip(): common_kwargs.update(precision=1) if self.coinflip(): render_kwargs.update(rotate=True) export_kwargs.update(rotate=True) if self.coinflip(): render_kwargs.update(leaves_parallel=False) export_kwargs.update(leaves_parallel=False) if discrete_t: render_kwargs.update(treatment_names=[ 'control gp', 'treated gp', 'more gp' ]) export_kwargs.update(treatment_names=[ 'control gp', 'treated gp', 'more gp' ]) else: render_kwargs.update( treatment_names=['control gp', 'treated gp']) export_kwargs.update( treatment_names=['control gp', 'treated gp']) if self.coinflip(): render_kwargs.update(format='png') if self.coinflip(): export_kwargs.update(out_file='out') if self.coinflip(0.95): # don't launch files most of the time render_kwargs.update(view=False) with self.subTest(t_shape=t_shape, y_shape=y_shape, discrete_t=discrete_t, fit_kwargs=fit_kwargs, cate_init_kwargs=cate_init_kwargs, policy_init_kwargs=policy_init_kwargs, policy_intrp_kwargs=policy_intrp_kwargs, intrp_kwargs=intrp_kwargs, common_kwargs=common_kwargs, plot_kwargs=plot_kwargs, render_kwargs=render_kwargs, export_kwargs=export_kwargs): plot_kwargs.update(common_kwargs) render_kwargs.update(common_kwargs) export_kwargs.update(common_kwargs) policy_intrp_kwargs.update(intrp_kwargs) est.fit(Y, T, X=X, **fit_kwargs) intrp = SingleTreeCateInterpreter(**cate_init_kwargs) intrp.interpret(est, X2, **intrp_kwargs) intrp.plot(**plot_kwargs) intrp.render('outfile', **render_kwargs) intrp.export_graphviz(**export_kwargs) intrp = SingleTreePolicyInterpreter(**policy_init_kwargs) try: intrp.interpret(est, X2, **policy_intrp_kwargs) intrp.plot(**plot_kwargs) intrp.render('outfile', **render_kwargs) intrp.export_graphviz(**export_kwargs) except AttributeError as e: assert str(e).find("samples should") >= 0
def test_comparison(self): def reg(): return LinearRegression() def clf(): return LogisticRegression() y, T, X, true_eff = self._get_data() (X_train, X_val, T_train, T_val, Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4) models = [ ('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, linear_first_stages=False, cv=3)), ('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, featurizer=PolynomialFeatures(degree=2, include_bias=False), linear_first_stages=False, cv=3)), ('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())), ('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(), propensity_model=clf())), ('slearner', SLearner(overall_model=reg())), ('tlearner', TLearner(models=reg())), ('drlearner', DRLearner(model_propensity=clf(), model_regression=reg(), model_final=reg(), cv=3)), ('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True, cv=3)), ('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True, featurizer=PolynomialFeatures(degree=3), linear_first_stages=False, cv=3)) ] models = Parallel(n_jobs=-1, verbose=1)( delayed(_fit_model)(name, mdl, Y_train, T_train, X_train) for name, mdl in models) scorer = RScorer(model_y=reg(), model_t=clf(), discrete_treatment=True, cv=3, mc_iters=2, mc_agg='median') scorer.fit(Y_val, T_val, X=X_val) rscore = [scorer.score(mdl) for _, mdl in models] rootpehe_score = [ np.sqrt( np.mean( (true_eff_val.flatten() - mdl.effect(X_val).flatten())**2)) for _, mdl in models ] assert LinearRegression().fit( np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5 mdl, _ = scorer.best_model([mdl for _, mdl in models]) rootpehe_best = np.sqrt( np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2)) assert rootpehe_best < 1.2 * np.min(rootpehe_score) mdl, _ = scorer.ensemble([mdl for _, mdl in models]) rootpehe_ensemble = np.sqrt( np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2)) assert rootpehe_ensemble < 1.2 * np.min(rootpehe_score)
def test_dml_multi_dim_treatment_outcome(self): """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """ from econml.dml import LinearDML from econml.inference import StatsModelsInference np.random.seed(123) n = 100000 precision = .01 precision_int = .0001 with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True): for d in [2, 5]: # n_feats + n_controls for d_x in [1]: # n_feats for p in [1, 5]: # n_outcomes for q in [1, 5]: # n_treatments X = np.random.binomial(1, .5, size=(n, d)) T = np.hstack([np.random.binomial(1, .5 + .2 * (2 * X[:, [1]] - 1)) for _ in range(q)]) def true_effect(x, i): return np.hstack([x[:, [0]] + 10 * t + i for t in range(p)]) y = np.sum((true_effect(X, i) * T[:, [i]] for i in range(q)), axis=0) + X[:, [0] * p] if p == 1: y = y.flatten() est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) est.fit(y, T, X=X[:, :d_x], W=X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust')) intercept = est.intercept_.reshape((p, q)) lower_int, upper_int = est.intercept__interval(alpha=.001) lower_int = lower_int.reshape((p, q)) upper_int = upper_int.reshape((p, q)) coef = est.coef_.reshape(p, q, d_x) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x) upper = upper.reshape(p, q, d_x) for i in range(p): for j in range(q): np.testing.assert_allclose(intercept[i, j], 10 * i + j, rtol=0, atol=precision) np.testing.assert_array_less(lower_int[i, j], 10 * i + j + precision_int) np.testing.assert_array_less(10 * i + j - precision_int, upper_int[i, j]) np.testing.assert_allclose(coef[i, j, 0], 1, atol=precision) np.testing.assert_array_less(lower[i, j, 0], 1) np.testing.assert_array_less(1, upper[i, j, 0]) np.testing.assert_allclose(coef[i, j, 1:], np.zeros(coef[i, j, 1:].shape), atol=precision) np.testing.assert_array_less(lower[i, j, 1:], np.zeros(lower[i, j, 1:].shape) + precision_int) np.testing.assert_array_less(np.zeros(lower[i, j, 1:].shape) - precision_int, upper[i, j, 1:]) est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False, featurizer=PolynomialFeatures(degree=1), fit_cate_intercept=False) est.fit(y, T, X=X[:, :d_x], W=X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust')) with pytest.raises(AttributeError) as e_info: intercept = est.intercept_ with pytest.raises(AttributeError) as e_info: intercept = est.intercept__interval(alpha=0.05) coef = est.coef_.reshape(p, q, d_x + 1) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x + 1) upper = upper.reshape(p, q, d_x + 1) for i in range(p): for j in range(q): np.testing.assert_allclose(coef[i, j, 0], 10 * i + j, rtol=0, atol=precision) np.testing.assert_array_less(lower[i, j, 0], 10 * i + j + precision_int) np.testing.assert_array_less(10 * i + j - precision_int, upper[i, j, 0]) np.testing.assert_allclose(coef[i, j, 1], 1, atol=precision) np.testing.assert_array_less(lower[i, j, 1], 1) np.testing.assert_array_less(1, upper[i, j, 1]) np.testing.assert_allclose(coef[i, j, 2:], np.zeros(coef[i, j, 2:].shape), atol=precision) np.testing.assert_array_less(lower[i, j, 2:], np.zeros(lower[i, j, 2:].shape) + precision_int) np.testing.assert_array_less(np.zeros(lower[i, j, 2:].shape) - precision_int, upper[i, j, 2:]) XT = np.hstack([X, T]) (X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec, var_first, var_sec) = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack([X_final_first, X_final_sec]) y_sum = np.concatenate((y_sum_first, y_sum_sec)) n_sum = np.concatenate((n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) class SplitterSum: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])), (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))] est = LinearDML( model_y=LinearRegression(), model_t=LinearRegression(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False).fit(y_sum, X_final[:, d:], X_final[:, :d_x], X_final[:, d_x:d], sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference(cov_type='nonrobust')) intercept = est.intercept_.reshape((p, q)) lower_int, upper_int = est.intercept__interval(alpha=.001) lower_int = lower_int.reshape((p, q)) upper_int = upper_int.reshape((p, q)) coef = est.coef_.reshape(p, q, d_x) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x) upper = upper.reshape(p, q, d_x) for i in range(p): for j in range(q): np.testing.assert_allclose(intercept[i, j], 10 * i + j, rtol=0, atol=precision) np.testing.assert_array_less(lower_int[i, j], 10 * i + j + precision_int) np.testing.assert_array_less(10 * i + j - precision_int, upper_int[i, j]) np.testing.assert_allclose(coef[i, j, 0], 1, atol=precision) np.testing.assert_array_less(lower[i, j, 0], 1) np.testing.assert_array_less(1, upper[i, j, 0]) np.testing.assert_allclose(coef[i, j, 1:], np.zeros(coef[i, j, 1:].shape), atol=precision) np.testing.assert_array_less(lower[i, j, 1:], np.zeros(lower[i, j, 1:].shape) + precision_int) np.testing.assert_array_less(np.zeros(lower[i, j, 1:].shape) - precision_int, upper[i, j, 1:])
def test_ate_inference(self): """Tests the ate inference results.""" Y, T, X, W = TestATEInference.Y, TestATEInference.T, TestATEInference.X, TestATEInference.W for inference in [BootstrapInference(n_bootstrap_samples=5), 'auto']: cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures( degree=2, include_bias=False)) cate_est.fit(Y, T, X=X, W=W, inference=inference) cate_est.ate(X) cate_est.ate_inference(X) cate_est.ate_interval(X, alpha=.01) lb, _ = cate_est.ate_inference(X).conf_int_mean() np.testing.assert_array_equal(lb.shape, Y.shape[1:]) cate_est.marginal_ate(T, X) cate_est.marginal_ate_interval(T, X, alpha=.01) cate_est.marginal_ate_inference(T, X) lb, _ = cate_est.marginal_ate_inference(T, X).conf_int_mean() np.testing.assert_array_equal(lb.shape, Y.shape[1:] + T.shape[1:]) cate_est.const_marginal_ate(X) cate_est.const_marginal_ate_interval(X, alpha=.01) cate_est.const_marginal_ate_inference(X) lb, _ = cate_est.const_marginal_ate_inference(X).conf_int_mean() np.testing.assert_array_equal(lb.shape, Y.shape[1:] + T.shape[1:]) summary = cate_est.ate_inference(X).summary(value=10) for i in range(Y.shape[1]): assert summary.tables[0].data[1 + i][4] < 1e-5 summary = cate_est.ate_inference(X).summary( value=np.mean(cate_est.effect(X), axis=0)) for i in range(Y.shape[1]): np.testing.assert_almost_equal( summary.tables[0].data[1 + i][4], 1.0) summary = cate_est.marginal_ate_inference(T, X).summary(value=10) for i in range(Y.shape[1]): for j in range(T.shape[1]): assert summary.tables[0].data[2 + i][1 + 3 * T.shape[1] + j] < 1e-5 summary = cate_est.marginal_ate_inference(T, X).summary( value=np.mean(cate_est.marginal_effect(T, X), axis=0)) for i in range(Y.shape[1]): for j in range(T.shape[1]): np.testing.assert_almost_equal( summary.tables[0].data[2 + i][1 + 3 * T.shape[1] + j], 1.0) summary = cate_est.const_marginal_ate_inference(X).summary( value=10) for i in range(Y.shape[1]): for j in range(T.shape[1]): assert summary.tables[0].data[2 + i][1 + 3 * T.shape[1] + j] < 1e-5 summary = cate_est.const_marginal_ate_inference(X).summary( value=np.mean(cate_est.const_marginal_effect(X), axis=0)) for i in range(Y.shape[1]): for j in range(T.shape[1]): np.testing.assert_almost_equal( summary.tables[0].data[2 + i][1 + 3 * T.shape[1] + j], 1.0)
def test_cat_treatments(self): X = TestPandasIntegration.df[TestPandasIntegration.features] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.cat_treat] # Test categorical treatments est = LinearDML(discrete_treatment=True, linear_first_stages=False, categories=TestPandasIntegration.cat_treat_labels) est.fit(Y, T, X=X) self._check_input_names(est.summary(), T_cat=True) treat_name = "Category" self._check_input_names( est.summary(treatment_names=[treat_name]), T_cat=True, treat_comp=[ f"{treat_name}_{t}" for t in TestPandasIntegration.cat_treat_labels[1:] ]) # Check refit est.fit(Y, T, X=X) self._check_input_names(est.summary(), T_cat=True) # Check refit after setting categories est.categories = [ f"{t}_1" for t in TestPandasIntegration.cat_treat_labels ] T = T.apply(lambda t: t + "_1") est.fit(Y, T, X=X) self._check_input_names( est.summary(), T_cat=True, treat_comp=[ f"{TestPandasIntegration.cat_treat[0]}_{t}_1" for t in TestPandasIntegration.cat_treat_labels[1:] ])
def run_all_mc(first_stage, folder, n_list, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list): if not os.path.exists("results"): os.makedirs('results') results_filename = os.path.join("results", "{}.txt".format(folder)) np.random.seed(123) coverage_est = {} coverage_lr = {} n_tests = 0 n_failed_coef = 0 n_failed_effect = 0 cov_tol = .04 for n in n_list: for hetero_coef in hetero_coef_list: for d in d_list: for d_x in d_x_list: if d_x > d: continue for p in p_list: for d_t in t_list: X_test = np.unique(np.random.binomial(1, .5, size=(20, d_x)), axis=0) t0 = time.time() for it in range(n_exp): X = np.random.binomial(1, .8, size=(n, d)) T = np.hstack([ np.random.binomial(1, .5 * X[:, 0] + .25, size=(n, )).reshape( -1, 1) for _ in range(d_t) ]) true_coef = np.hstack([ np.hstack([ it + np.arange(p).reshape(-1, 1), it + np.ones((p, 1)), np.zeros((p, d_x - 1)) ]) for it in range(d_t) ]) def true_effect(x, t): return cross_product( np.hstack([ np.ones( (x.shape[0], 1)), x[:, :d_x] ]), t) @ true_coef.T y = true_effect(X, T) + X[:, [0] * p] +\ (hetero_coef * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p)) XT = np.hstack([X, T]) X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec,\ n_sum_first, n_sum_sec, var_first, var_sec = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack( [X_final_first, X_final_sec]) y_sum = np.concatenate( (y_sum_first, y_sum_sec)) n_sum = np.concatenate( (n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) first_half = len(y1) for cov_type in cov_type_list: class SplitterSum: def __init__(self): return def split(self, X, T): return [ (np.arange(0, first_half_sum), np.arange( first_half_sum, X.shape[0])), (np.arange( first_half_sum, X.shape[0]), np.arange(0, first_half_sum)) ] est = LinearDML(model_y=first_stage(), model_t=first_stage(), cv=SplitterSum(), linear_first_stages=False, discrete_treatment=False) est.fit(y_sum, X_final[:, -d_t:], X_final[:, :d_x], X_final[:, d_x:-d_t], sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference( cov_type=cov_type)) class Splitter: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half), np.arange( first_half, X.shape[0])), (np.arange( first_half, X.shape[0]), np.arange(0, first_half))] lr = LinearDML(model_y=first_stage(), model_t=first_stage(), cv=Splitter(), linear_first_stages=False, discrete_treatment=False) lr.fit(y, X[:, -d_t:], X=X[:, :d_x], W=X[:, d_x:-d_t], inference=StatsModelsInference( cov_type=cov_type)) for alpha in alpha_list: key = ( "n_{}_n_exp_{}_hetero_{}_d_{}_d_x_" "{}_p_{}_d_t_{}_cov_type_{}_alpha_{}" ).format(n, n_exp, hetero_coef, d, d_x, p, d_t, cov_type, alpha) _append_coverage( key, coverage_est, est, X_test, alpha, true_coef, true_effect) _append_coverage( key, coverage_lr, lr, X_test, alpha, true_coef, true_effect) if it == n_exp - 1: n_tests += 1 mean_coef_cov = np.mean( coverage_est[key]['coef_cov']) mean_eff_cov = np.mean( coverage_est[key] ['effect_cov']) mean_coef_cov_lr = np.mean( coverage_lr[key]['coef_cov']) mean_eff_cov_lr = np.mean( coverage_lr[key]['effect_cov']) [ print( "{}. Time: {:.2f}, Mean Coef Cov: ({:.4f}, {:.4f}), " "Mean Effect Cov: ({:.4f}, {:.4f})" .format( key, time.time() - t0, mean_coef_cov, mean_coef_cov_lr, mean_eff_cov, mean_eff_cov_lr), file=f) for f in [ None, open( results_filename, "a") ] ] coef_cov_dev = mean_coef_cov - ( 1 - alpha) if np.abs(coef_cov_dev) >= cov_tol: n_failed_coef += 1 [ print( "BAD coef coverage on " "average: deviation = {:.4f}" .format(coef_cov_dev), file=f) for f in [ None, open( results_filename, "a") ] ] eff_cov_dev = mean_eff_cov - ( 1 - alpha) if np.abs(eff_cov_dev) >= cov_tol: n_failed_effect += 1 [ print( "BAD effect coverage on " "average: deviation = {:.4f}" .format(eff_cov_dev), file=f) for f in [ None, open( results_filename, "a") ] ] [ print( "Finished {} Monte Carlo Tests. Failed Coef Coverage Tests: {}/{}." "Failed Effect Coverage Tests: {}/{}. (Coverage Tolerance={})". format(n_tests, n_failed_coef, n_tests, n_failed_effect, n_tests, cov_tol), file=f) for f in [None, open(results_filename, "a")] ] agg_coverage_est, std_coverage_est, q_coverage_est = _agg_coverage( coverage_est) agg_coverage_lr, std_coverage_lr, q_coverage_lr = _agg_coverage( coverage_lr) [ print("\nResults for: {}\n--------------------------\n".format(folder), file=f) for f in [None, open(results_filename, "a")] ] plot_coverage(agg_coverage_est, 'coef_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="sum_", folder=folder) plot_coverage(agg_coverage_lr, 'coef_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="orig_", folder=folder) plot_coverage(agg_coverage_est, 'effect_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="sum_", folder=folder) plot_coverage(agg_coverage_lr, 'effect_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="orig_", folder=folder) [ print("Summarized Data\n----------------", file=f) for f in [None, open(results_filename, "a")] ] print_aggregate(agg_coverage_est, std_coverage_est, q_coverage_est) print_aggregate(agg_coverage_est, std_coverage_est, q_coverage_est, lambda: open(results_filename, "a")) [ print("\nUn-Summarized Data\n-----------------", file=f) for f in [None, open(results_filename, "a")] ] print_aggregate(agg_coverage_lr, std_coverage_lr, q_coverage_lr) print_aggregate(agg_coverage_lr, std_coverage_lr, q_coverage_lr, lambda: open(results_filename, "a"))
def test_dml(self): """Test setting attributes and refitting""" y, T, X, W = self._get_data() dml = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), linear_first_stages=False, random_state=123) dml.fit(y, T, X=X, W=W) with pytest.raises(Exception): dml.refit_final() dml.fit(y, T, X=X, W=W, cache_values=True) dml.model_final = StatsModelsRLM(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, StatsModelsRLM) np.testing.assert_array_equal(dml.model_cate.coef_[1:].flatten(), dml.coef_.flatten()) lb, ub = dml.model_cate.coef__interval(alpha=0.01) lbt, ubt = dml.coef__interval(alpha=0.01) np.testing.assert_array_equal(lb[1:].flatten(), lbt.flatten()) np.testing.assert_array_equal(ub[1:].flatten(), ubt.flatten()) intcpt = dml.intercept_ dml.fit_cate_intercept = False np.testing.assert_equal(dml.intercept_, intcpt) dml.refit_final() np.testing.assert_array_equal(dml.model_cate.coef_.flatten(), dml.coef_.flatten()) lb, ub = dml.model_cate.coef__interval(alpha=0.01) lbt, ubt = dml.coef__interval(alpha=0.01) np.testing.assert_array_equal(lb.flatten(), lbt.flatten()) np.testing.assert_array_equal(ub.flatten(), ubt.flatten()) with pytest.raises(AttributeError): dml.intercept_ with pytest.raises(AttributeError): dml.intercept__interval() dml.model_final = DebiasedLasso(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, DebiasedLasso) dml.featurizer = PolynomialFeatures(degree=2, include_bias=False) dml.model_final = StatsModelsLinearRegression(fit_intercept=False) dml.refit_final() assert isinstance(dml.featurizer_, PolynomialFeatures) dml.fit_cate_intercept = True dml.refit_final() assert isinstance(dml.featurizer_, Pipeline) np.testing.assert_array_equal(dml.coef_.shape, (X.shape[1]**2)) np.testing.assert_array_equal(dml.coef__interval()[0].shape, (X.shape[1]**2)) coefpre = dml.coef_ coefpreint = dml.coef__interval() dml.fit(y, T, X=X, W=W) np.testing.assert_array_equal(coefpre, dml.coef_) np.testing.assert_array_equal(coefpreint[0], dml.coef__interval()[0]) dml.discrete_treatment = True dml.featurizer = None dml.linear_first_stages = True dml.model_t = LogisticRegression() dml.fit(y, T, X=X, W=W) newdml = DML(model_y=LinearRegression(), model_t=LogisticRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), discrete_treatment=True, linear_first_stages=True, random_state=123).fit(y, T, X=X, W=W) np.testing.assert_array_equal(dml.coef_, newdml.coef_) np.testing.assert_array_equal(dml.coef__interval()[0], newdml.coef__interval()[0]) ldml = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) ldml.fit(y, T, X=X, W=W, cache_values=True) # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression) with pytest.raises(ValueError): ldml.model_final = StatsModelsRLM() ldml = SparseLinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) ldml.fit(y, T, X=X, W=W, cache_values=True) # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression) with pytest.raises(ValueError): ldml.model_final = StatsModelsRLM() ldml.alpha = 0.01 ldml.max_iter = 10 ldml.tol = 0.01 ldml.refit_final() np.testing.assert_equal(ldml.model_cate.estimators_[0].alpha, 0.01) np.testing.assert_equal(ldml.model_cate.estimators_[0].max_iter, 10) np.testing.assert_equal(ldml.model_cate.estimators_[0].tol, 0.01)