def test_multiple_treatments(self): np.random.seed(123) # Only applicable to continuous treatments # Generate data for 2 treatments TE = np.array( [[TestOrthoForest._exp_te(x), TestOrthoForest._const_te(x)] for x in TestOrthoForest.X]) coefs_T = uniform(0, 1, size=(TestOrthoForest.support_size, 2)) T = np.matmul(TestOrthoForest.W[:, TestOrthoForest.support], coefs_T) + \ uniform(-1, 1, size=(TestOrthoForest.n, 2)) delta_Y = np.array( [np.dot(TE[i], T[i]) for i in range(TestOrthoForest.n)]) Y = delta_Y + np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ TestOrthoForest.epsilon_sample(TestOrthoForest.n) for global_residualization in [False, True]: # Test multiple treatments with controls est = DMLOrthoForest(n_trees=100, min_leaf_size=10, max_depth=50, subsample_ratio=0.50, bootstrap=False, n_jobs=1, model_T=MultiOutputRegressor( Lasso(alpha=0.024)), model_Y=Lasso(alpha=0.024), model_T_final=WeightedLassoCVWrapper(cv=5), model_Y_final=WeightedLassoCVWrapper(cv=5), global_residualization=global_residualization, global_res_cv=5) est.fit(Y, T, X=TestOrthoForest.X, W=TestOrthoForest.W, inference="blb") expected_te = np.array([ TestOrthoForest.expected_exp_te, TestOrthoForest.expected_const_te ]).T self._test_te(est, expected_te, tol=0.5, treatment_type='multi') self._test_ci(est, expected_te, tol=2.0, treatment_type='multi') # Test CausalForest API est = CausalForest(n_trees=100, min_leaf_size=10, max_depth=50, subsample_ratio=0.50, n_jobs=-1, model_T=WeightedLassoCVWrapper(cv=5), model_Y=WeightedLassoCVWrapper(cv=5), cv=5) est.fit(Y, T, X=TestOrthoForest.X, W=TestOrthoForest.W, inference="blb") expected_te = np.array([ TestOrthoForest.expected_exp_te, TestOrthoForest.expected_const_te ]).T self._test_te(est, expected_te, tol=0.5, treatment_type='multi') self._test_ci(est, expected_te, tol=2.0, treatment_type='multi')
def test_continuous_treatments(self): np.random.seed(123) for global_residualization in [False, True]: # Generate data with continuous treatments T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with most of the default parameters. Using n_jobs=1 since code coverage # does not work well with parallelism. est = DMLOrthoForest(n_jobs=1, n_trees=10, model_T=Lasso(), model_Y=Lasso(), model_T_final=WeightedLassoCVWrapper(), model_Y_final=WeightedLassoCVWrapper(), global_residualization=global_residualization) # Test inputs for continuous treatments # --> Check that one can pass in regular lists est.fit(list(Y), list(T), X=list(TestOrthoForest.X), W=list(TestOrthoForest.W)) # --> Check that it fails correctly if lists of different shape are passed in self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2], TestOrthoForest.X, TestOrthoForest.W) # Check that outputs have the correct shape out_te = est.const_marginal_effect(TestOrthoForest.x_test) self.assertEqual(TestOrthoForest.x_test.shape[0], out_te.shape[0]) # Test continuous treatments with controls est = DMLOrthoForest(n_trees=100, min_leaf_size=10, max_depth=50, subsample_ratio=0.50, bootstrap=False, n_jobs=1, model_T=Lasso(alpha=0.024), model_Y=Lasso(alpha=0.024), model_T_final=WeightedLassoCVWrapper(cv=5), model_Y_final=WeightedLassoCVWrapper(cv=5), global_residualization=global_residualization, global_res_cv=5) est.fit(Y, T, X=TestOrthoForest.X, W=TestOrthoForest.W, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5) # Test continuous treatments without controls T = TestOrthoForest.eta_sample(TestOrthoForest.n) Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) est.fit(Y, T, TestOrthoForest.X, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5) # Test Causal Forest API # Generate data with continuous treatments T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with most of the default parameters. est = CausalForest(n_jobs=-1, n_trees=10, model_T=WeightedLassoCVWrapper(), model_Y=WeightedLassoCVWrapper()) # Test inputs for continuous treatments # --> Check that one can pass in regular lists est.fit(list(Y), list(T), X=list(TestOrthoForest.X), W=list(TestOrthoForest.W)) # --> Check that it fails correctly if lists of different shape are passed in self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2], TestOrthoForest.X, TestOrthoForest.W) # Check that outputs have the correct shape out_te = est.const_marginal_effect(TestOrthoForest.x_test) self.assertEqual(TestOrthoForest.x_test.shape[0], out_te.shape[0]) # Test continuous treatments with controls est = CausalForest(n_jobs=-1, n_trees=100, min_leaf_size=10, max_depth=50, subsample_ratio=0.50, model_T=WeightedLassoCVWrapper(), model_Y=WeightedLassoCVWrapper(), cv=5) est.fit(Y, T, X=TestOrthoForest.X, W=TestOrthoForest.W, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5) # Test continuous treatments without controls T = TestOrthoForest.eta_sample(TestOrthoForest.n) Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) est.fit(Y, T, X=TestOrthoForest.X, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5)
def test_effect_shape(self): import scipy.special np.random.seed(123) n = 40 # number of raw samples d = 4 # number of binary features + 1 # Generating random segments aka binary features. We will use features 0,...,3 for heterogeneity. # The rest for controls. Just as an example. X = np.random.binomial(1, .5, size=(n, d)) # Generating A/B test data T = np.random.binomial(2, .5, size=(n, )) # Generating an outcome with treatment effect heterogeneity. The first binary feature creates heterogeneity # We also have confounding on the first variable. We also have heteroskedastic errors. y = (-1 + 2 * X[:, 0]) * T + X[:, 0] + ( 1 * X[:, 0] + 1) * np.random.normal(0, 1, size=(n, )) from sklearn.dummy import DummyClassifier, DummyRegressor est = DROrthoForest(n_trees=10, model_Y=DummyRegressor(strategy='mean'), propensity_model=DummyClassifier(strategy='prior'), n_jobs=1) est.fit(y, T, X=X) assert est.const_marginal_effect( X[:3]).shape == (3, 2), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 2), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, ), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, ), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, ), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == (3, 2), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == (3, 2), "Marginal Effect interval dimension incorrect" est.fit(y.reshape(-1, 1), T, X=X) assert est.const_marginal_effect( X[:3]).shape == (3, 1, 2), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 1, 2), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, 1), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 1, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 1, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == (3, 1, 2), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == (3, 1, 2), "Marginal Effect interval dimension incorrect" # Test causal foret API est = CausalForest(n_trees=10, model_Y=DummyRegressor(strategy='mean'), model_T=DummyClassifier(strategy='prior'), discrete_treatment=True, n_jobs=1) est.fit(y, T, X=X) assert est.const_marginal_effect( X[:3]).shape == (3, 2), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 2), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, ), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, ), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, ), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == (3, 2), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == (3, 2), "Marginal Effect interval dimension incorrect" est.fit(y.reshape(-1, 1), T, X=X) assert est.const_marginal_effect( X[:3]).shape == (3, 1, 2), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 1, 2), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, 1), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 1, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 1, 2), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == (3, 1, 2), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == (3, 1, 2), "Marginal Effect interval dimension incorrect" from sklearn.dummy import DummyClassifier, DummyRegressor for global_residualization in [False, True]: est = DMLOrthoForest(n_trees=10, model_Y=DummyRegressor(strategy='mean'), model_T=DummyRegressor(strategy='mean'), global_residualization=global_residualization, n_jobs=1) est.fit(y.reshape(-1, 1), T.reshape(-1, 1), X=X) assert est.const_marginal_effect(X[:3]).shape == ( 3, 1, 1), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 1, 1), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, 1), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 1, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 1, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == ( 3, 1, 1), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == ( 3, 1, 1), "Marginal Effect interval dimension incorrect" est.fit(y.reshape(-1, 1), T, X=X) assert est.const_marginal_effect(X[:3]).shape == ( 3, 1), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 1), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, 1), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) print(lb.shape) assert lb.shape == ( 3, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == ( 3, 1), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == ( 3, 1), "Marginal Effect interval dimension incorrect" est.fit(y, T, X=X) assert est.const_marginal_effect(X[:3]).shape == ( 3, ), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, ), "Marginal Effect dimension incorrect" assert est.effect( X[:3]).shape == (3, ), "Effect dimension incorrect" assert est.effect( X[:3], T0=0, T1=2).shape == (3, ), "Effect dimension incorrect" assert est.effect( X[:3], T0=1, T1=2).shape == (3, ), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, ), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, ), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == ( 3, ), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == ( 3, ), "Marginal Effect interval dimension incorrect" # Test Causal Forest API est = CausalForest(n_trees=10, model_Y=DummyRegressor(strategy='mean'), model_T=DummyRegressor(strategy='mean'), n_jobs=1) est.fit(y.reshape(-1, 1), T.reshape(-1, 1), X=X) assert est.const_marginal_effect( X[:3]).shape == (3, 1, 1), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 1, 1), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, 1), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 1, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 1, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == (3, 1, 1), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == (3, 1, 1), "Marginal Effect interval dimension incorrect" est.fit(y.reshape(-1, 1), T, X=X) assert est.const_marginal_effect( X[:3]).shape == (3, 1), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, 1), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, 1), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, 1), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, 1), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, 1), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == (3, 1), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == (3, 1), "Marginal Effect interval dimension incorrect" est.fit(y, T, X=X) assert est.const_marginal_effect( X[:3]).shape == (3, ), "Const Marginal Effect dimension incorrect" assert est.marginal_effect( 1, X[:3]).shape == (3, ), "Marginal Effect dimension incorrect" assert est.effect(X[:3]).shape == (3, ), "Effect dimension incorrect" assert est.effect(X[:3], T0=0, T1=2).shape == (3, ), "Effect dimension incorrect" assert est.effect(X[:3], T0=1, T1=2).shape == (3, ), "Effect dimension incorrect" lb, _ = est.effect_interval(X[:3], T0=1, T1=2) assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.effect_inference(X[:3], T0=1, T1=2).conf_int() assert lb.shape == (3, ), "Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_interval(X[:3]) assert lb.shape == ( 3, ), "Const Marginal Effect interval dimension incorrect" lb, _ = est.const_marginal_effect_inference(X[:3]).conf_int() assert lb.shape == ( 3, ), "Const Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_interval(1, X[:3]) assert lb.shape == ( 3, ), "Marginal Effect interval dimension incorrect" lb, _ = est.marginal_effect_inference(1, X[:3]).conf_int() assert lb.shape == ( 3, ), "Marginal Effect interval dimension incorrect"
def test_binary_treatments(self): np.random.seed(123) # Generate data with binary treatments log_odds = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) T_sigmoid = 1 / (1 + np.exp(-log_odds)) T = np.array([np.random.binomial(1, p) for p in T_sigmoid]) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with default params. Using n_jobs=1 since code coverage # does not work well with parallelism. est = DROrthoForest(n_trees=10, n_jobs=1, propensity_model=LogisticRegression(), model_Y=Lasso(), propensity_model_final=LogisticRegressionCV( penalty='l1', solver='saga'), model_Y_final=WeightedLassoCVWrapper()) # Test inputs for binary treatments # --> Check that one can pass in regular lists est.fit(list(Y), list(T), X=list(TestOrthoForest.X), W=list(TestOrthoForest.W)) # --> Check that it fails correctly if lists of different shape are passed in self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2], TestOrthoForest.X, TestOrthoForest.W) # --> Check that it works when T, Y have shape (n, 1) est.fit(Y.reshape(-1, 1), T.reshape(-1, 1), X=TestOrthoForest.X, W=TestOrthoForest.W) # --> Check that it fails correctly when T has shape (n, 2) self.assertRaises(ValueError, est.fit, Y, np.ones((TestOrthoForest.n, 2)), TestOrthoForest.X, TestOrthoForest.W) # --> Check that it fails correctly when the treatments are not numeric self.assertRaises(ValueError, est.fit, Y, np.array(["a"] * TestOrthoForest.n), TestOrthoForest.X, TestOrthoForest.W) # Check that outputs have the correct shape out_te = est.const_marginal_effect(TestOrthoForest.x_test) self.assertSequenceEqual((TestOrthoForest.x_test.shape[0], 1, 1), out_te.shape) # Test binary treatments with controls est = DROrthoForest(n_trees=100, min_leaf_size=10, max_depth=30, subsample_ratio=0.30, bootstrap=False, n_jobs=1, propensity_model=LogisticRegression(C=1 / 0.024, penalty='l1', solver='saga'), model_Y=Lasso(alpha=0.024), propensity_model_final=LogisticRegressionCV( penalty='l1', solver='saga'), model_Y_final=WeightedLassoCVWrapper()) est.fit(Y, T, X=TestOrthoForest.X, W=TestOrthoForest.W, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.7, treatment_type='discrete') self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5, treatment_type='discrete') # Test binary treatments without controls log_odds = TestOrthoForest.eta_sample(TestOrthoForest.n) T_sigmoid = 1 / (1 + np.exp(-log_odds)) T = np.array([np.random.binomial(1, p) for p in T_sigmoid]) Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) est.fit(Y, T, X=TestOrthoForest.X, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5, treatment_type='discrete') self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5, treatment_type='discrete') # Test CausalForest API np.random.seed(123) # Generate data with binary treatments log_odds = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) T_sigmoid = 1 / (1 + np.exp(-log_odds)) T = np.array([np.random.binomial(1, p) for p in T_sigmoid]) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with default params. Using n_jobs=1 since code coverage # does not work well with parallelism. est = CausalForest(n_trees=10, n_jobs=-1, model_Y=Lasso(), model_T=LogisticRegressionCV(penalty='l1', solver='saga')) # Test inputs for binary treatments # --> Check that one can pass in regular lists est.fit(list(Y), list(T), X=list(TestOrthoForest.X), W=list(TestOrthoForest.W)) # --> Check that it fails correctly if lists of different shape are passed in self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2], TestOrthoForest.X, TestOrthoForest.W) # --> Check that it works when T, Y have shape (n, 1) est.fit(Y.reshape(-1, 1), T.reshape(-1, 1), X=TestOrthoForest.X, W=TestOrthoForest.W) # --> Check that it fails correctly when T has shape (n, 2) self.assertRaises(ValueError, est.fit, Y, np.ones((TestOrthoForest.n, 2)), TestOrthoForest.X, TestOrthoForest.W) # --> Check that it fails correctly when the treatments are not numeric self.assertRaises(ValueError, est.fit, Y, np.array(["a"] * TestOrthoForest.n), TestOrthoForest.X, TestOrthoForest.W) # Check that outputs have the correct shape out_te = est.const_marginal_effect(TestOrthoForest.x_test) self.assertSequenceEqual((TestOrthoForest.x_test.shape[0], 1, 1), out_te.shape) # Test binary treatments with controls est = CausalForest(n_trees=100, min_leaf_size=10, max_depth=30, subsample_ratio=0.30, n_jobs=-1, model_Y=Lasso(), model_T=LogisticRegressionCV(penalty='l1', solver='saga'), discrete_treatment=True, cv=5) est.fit(Y, T, X=TestOrthoForest.X, W=TestOrthoForest.W, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.7, treatment_type='discrete') self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5, treatment_type='discrete') # Test binary treatments without controls log_odds = TestOrthoForest.eta_sample(TestOrthoForest.n) T_sigmoid = 1 / (1 + np.exp(-log_odds)) T = np.array([np.random.binomial(1, p) for p in T_sigmoid]) Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) est.fit(Y, T, X=TestOrthoForest.X, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5, treatment_type='discrete') self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5, treatment_type='discrete')
max_depth = 20 subsample_ratio = 0.04 #%% # Definition of range of variable tested for heterogeneity min_tfsum = 0.0 max_tfsum = 24.0 delta = (max_tfsum - min_tfsum) / 100 X_test = np.arange(min_tfsum, max_tfsum + delta - 0.001, delta).reshape(-1, 1) #%% # Estimation of causal tree est = CausalForest(n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth, subsample_ratio=subsample_ratio, model_T=WeightedLassoCVWrapper(cv=3), model_Y=WeightedLassoCVWrapper(cv=3), random_state=123) est.fit(Y, T, X=X, W=W) treatment_effects = est.effect(X_test) te_lower, te_upper = est.effect_interval(X_test) #%% # Plot results plt.figure(figsize=(15, 5)) plt.plot(X_test.flatten(), treatment_effects) plt.fill_between(X_test.flatten(), te_lower, te_upper, label="90% CI",