def test_multiple_treatments(self): np.random.seed(123) # Only applicable to continuous treatments # Generate data for 2 treatments TE = np.array( [[TestOrthoForest._exp_te(x), TestOrthoForest._const_te(x)] for x in TestOrthoForest.X]) coefs_T = uniform(0, 1, size=(TestOrthoForest.support_size, 2)) T = np.matmul(TestOrthoForest.W[:, TestOrthoForest.support], coefs_T) + \ uniform(-1, 1, size=(TestOrthoForest.n, 2)) delta_Y = np.array( [np.dot(TE[i], T[i]) for i in range(TestOrthoForest.n)]) Y = delta_Y + np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Test multiple treatments with controls est = ContinuousTreatmentOrthoForest( n_trees=50, min_leaf_size=10, max_depth=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4, model_T=MultiOutputRegressor(Lasso(alpha=0.024)), model_Y=Lasso(alpha=0.024), model_T_final=WeightedLassoCVWrapper(), model_Y_final=WeightedLassoCVWrapper()) est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W, inference="blb") expected_te = np.array([ TestOrthoForest.expected_exp_te, TestOrthoForest.expected_const_te ]).T self._test_te(est, expected_te, tol=0.5, treatment_type='multi') self._test_ci(est, expected_te, tol=2.0, treatment_type='multi')
def fit(self, x, t, y, nfolds=5, seed=282): # splits = super().get_splits(x, nfolds, seed) #### CLASSIFICATION #### if self.binary: self.reg = DiscreteTreatmentOrthoForest(n_trees=1, max_depth=2, n_jobs=100, subsample_ratio=0.25, random_state=282) self.reg.fit(y, t, x) #### REGRESSION #### else: self.reg = ContinuousTreatmentOrthoForest(n_trees=1, max_depth=2, n_jobs=100, subsample_ratio=0.25, random_state=282) self.reg.fit(y, t, x)
def test_nuisance_model_has_weights(self): """Test whether the correct exception is being raised if model_final doesn't have weights.""" # Generate data with continuous treatments T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with most of the default parameters est = ContinuousTreatmentOrthoForest(n_jobs=4, n_trees=10, model_T=Lasso(), model_Y=Lasso()) est.fit(Y=Y, T=T, X=TestOrthoForest.X, W=TestOrthoForest.W) weights_error_msg = ( "Estimators of type {} do not accept weights. " "Consider using the class WeightedModelWrapper from econml.utilities to build a weighted model." ) self.assertRaisesRegexp(TypeError, weights_error_msg.format("Lasso"), est.effect, X=TestOrthoForest.X)
class OrthoForest(Model): def __init__(self, *args, **kwargs): self.reg = None super(OrthoForest, self).__init__(*args, **kwargs) def fit(self, x, t, y, nfolds=5, seed=282): # splits = super().get_splits(x, nfolds, seed) #### CLASSIFICATION #### if self.binary: self.reg = DiscreteTreatmentOrthoForest(n_trees=1, max_depth=2, n_jobs=100, subsample_ratio=0.25, random_state=282) self.reg.fit(y, t, x) #### REGRESSION #### else: self.reg = ContinuousTreatmentOrthoForest(n_trees=1, max_depth=2, n_jobs=100, subsample_ratio=0.25, random_state=282) self.reg.fit(y, t, x) def predict(self, x, t): if self.reg is None: raise Exception('OrthoForest not Initialized') # print("x", x.shape, x) # print("t", t.shape, t) effect = self.reg.const_marginal_effect(x).reshape(-1) # print("effect", effect.shape, effect) return effect * t def get_predictors(self, x, t): return np.hstack([x, (t - 0.5).reshape(-1, 1) * x])
def test_nuisance_model_has_weights(self): """Test whether the correct exception is being raised if model_final doesn't have weights.""" # Create a wrapper around Lasso that doesn't support weights # since Lasso does natively support them starting in sklearn 0.23 class NoWeightModel: def __init__(self): self.model = Lasso() def fit(self, X, y): self.model.fit(X, y) return self def predict(self, X): return self.model.predict(X) # Generate data with continuous treatments T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with most of the default parameters est = ContinuousTreatmentOrthoForest(n_jobs=4, n_trees=10, model_T=NoWeightModel(), model_Y=NoWeightModel()) est.fit(Y=Y, T=T, X=TestOrthoForest.X, W=TestOrthoForest.W) weights_error_msg = ( "Estimators of type {} do not accept weights. " "Consider using the class WeightedModelWrapper from econml.utilities to build a weighted model." ) self.assertRaisesRegexp(TypeError, weights_error_msg.format("NoWeightModel"), est.effect, X=TestOrthoForest.X)
''' EconML from Microsoft python econometrics library with ML automation for causal inference ''' # Double ML from econml.dml import DMLCateEstimator from sklearn.linear_model import LassoCV est = DMLCateEstimator(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X, W) # W -> high-dimensional confounders, X -> features treatment_effects = est.effect(X_test) # Orthogonal Random Forest from econml.ortho_forest import ContinuousTreatmentOrthoForest # Use defaults est = ContinuousTreatmentOrthoForest() # Or specify hyperparameters est = ContinuousTreatmentOrthoForest(n_trees=500, min_leaf_size=10, max_depth=10, subsample_ratio=0.7, lambda_reg=0.01, model_T=LassoCV(cv=3), model_Y=LassoCV(cv=3) ) est.fit(Y, T, X, W) treatment_effects = est.effect(X_test)
def test_continuous_treatments(self): np.random.seed(123) # Generate data with continuous treatments T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with most of the default parameters. Using n_jobs=1 since code coverage # does not work well with parallelism. est = ContinuousTreatmentOrthoForest( n_jobs=1, n_trees=10, model_T=Lasso(), model_Y=Lasso(), model_T_final=WeightedLassoCVWrapper(), model_Y_final=WeightedLassoCVWrapper()) # Test inputs for continuous treatments # --> Check that one can pass in regular lists est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W)) # --> Check that it fails correctly if lists of different shape are passed in self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2], TestOrthoForest.X, TestOrthoForest.W) # Check that outputs have the correct shape out_te = est.const_marginal_effect(TestOrthoForest.x_test) self.assertEqual(TestOrthoForest.x_test.shape[0], out_te.shape[0]) # Test continuous treatments with controls est = ContinuousTreatmentOrthoForest( n_trees=50, min_leaf_size=10, max_depth=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4, model_T=Lasso(alpha=0.024), model_Y=Lasso(alpha=0.024), model_T_final=WeightedLassoCVWrapper(), model_Y_final=WeightedLassoCVWrapper()) est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5) # Test continuous treatments without controls T = TestOrthoForest.eta_sample(TestOrthoForest.n) Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) est.fit(Y, T, TestOrthoForest.X, inference="blb") self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5)
from econml.sklearn_extensions.linear_model import WeightedLasso import matplotlib.pyplot as plt from econml.ortho_forest import ContinuousTreatmentOrthoForest, DiscreteTreatmentOrthoForest np.random.seed(123) ############################################################################### # 1) Example with contin forest ############################################################################### # simple example T = np.array([0, 1] * 60) W = np.array([0, 1, 1, 0] * 30).reshape(-1, 1) Y = (.2 * W[:, 0] + 1) * T + .5 est = ContinuousTreatmentOrthoForest( n_trees=1, max_depth=1, subsample_ratio=1, model_T=sklearn.linear_model.LinearRegression(), model_Y=sklearn.linear_model.LinearRegression()) est.fit(Y, T, W, W) print(est.effect(W[:2])) # advanced example with many confounders X = np.random.uniform(-1, 1, size=(4000, 1)) W = np.random.normal(size=(4000, 50)) support = np.random.choice(50, 4, replace=False) T = np.dot(W[:, support], np.random.normal(size=4)) + np.random.normal(size=4000) Y = np.exp(2 * X[:, 0]) * T + np.dot(W[:, support], np.random.normal(size=4)) + .5 est = ContinuousTreatmentOrthoForest(n_trees=100,
def test_continuous_treatments(self): # Generate data with continuous treatments T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \ TestOrthoForest.eta_sample(TestOrthoForest.n) TE = np.array([self._exp_te(x) for x in TestOrthoForest.X]) Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \ T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) # Instantiate model with most of the default parameters est = ContinuousTreatmentOrthoForest( n_jobs=4, n_trees=10, model_T=Lasso(), model_Y=Lasso(), model_T_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"), model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted")) # Test inputs for continuous treatments # --> Check that one can pass in regular lists est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W)) # --> Check that it fails correctly if lists of different shape are passed in self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2], TestOrthoForest.X, TestOrthoForest.W) # Check that outputs have the correct shape out_te = est.const_marginal_effect(TestOrthoForest.x_test) self.assertSequenceEqual((TestOrthoForest.x_test.shape[0], 1), out_te.shape) # Test continuous treatments with controls est = ContinuousTreatmentOrthoForest( n_trees=50, min_leaf_size=10, max_splits=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4, model_T=Lasso(alpha=0.024), model_Y=Lasso(alpha=0.024), model_T_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"), model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted")) est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W) self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5) # Test continuous treatments without controls T = TestOrthoForest.eta_sample(TestOrthoForest.n) Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n) est.fit(Y, T, TestOrthoForest.X) self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)