예제 #1
0
 def test_continuous_treatments(self):
     np.random.seed(123)
     for global_residualization in [False, True]:
         # Generate data with continuous treatments
         T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \
             TestOrthoForest.eta_sample(TestOrthoForest.n)
         TE = np.array([self._exp_te(x) for x in TestOrthoForest.X])
         Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \
             T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n)
         # Instantiate model with most of the default parameters. Using n_jobs=1 since code coverage
         # does not work well with parallelism.
         est = DMLOrthoForest(n_jobs=1,
                              n_trees=10,
                              model_T=Lasso(),
                              model_Y=Lasso(),
                              model_T_final=WeightedLassoCVWrapper(),
                              model_Y_final=WeightedLassoCVWrapper(),
                              global_residualization=global_residualization)
         # Test inputs for continuous treatments
         # --> Check that one can pass in regular lists
         est.fit(list(Y),
                 list(T),
                 X=list(TestOrthoForest.X),
                 W=list(TestOrthoForest.W))
         # --> Check that it fails correctly if lists of different shape are passed in
         self.assertRaises(ValueError,
                           est.fit,
                           Y[:TestOrthoForest.n // 2],
                           T[:TestOrthoForest.n // 2],
                           X=TestOrthoForest.X,
                           W=TestOrthoForest.W)
         # Check that outputs have the correct shape
         out_te = est.const_marginal_effect(TestOrthoForest.x_test)
         self.assertEqual(TestOrthoForest.x_test.shape[0], out_te.shape[0])
         # Test continuous treatments with controls
         est = DMLOrthoForest(n_trees=100,
                              min_leaf_size=10,
                              max_depth=50,
                              subsample_ratio=0.50,
                              bootstrap=False,
                              n_jobs=1,
                              model_T=Lasso(alpha=0.024),
                              model_Y=Lasso(alpha=0.024),
                              model_T_final=WeightedLassoCVWrapper(cv=5),
                              model_Y_final=WeightedLassoCVWrapper(cv=5),
                              global_residualization=global_residualization,
                              global_res_cv=5)
         est.fit(Y,
                 T,
                 X=TestOrthoForest.X,
                 W=TestOrthoForest.W,
                 inference="blb")
         self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)
         self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5)
         # Test continuous treatments without controls
         T = TestOrthoForest.eta_sample(TestOrthoForest.n)
         Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n)
         est.fit(Y, T, X=TestOrthoForest.X, inference="blb")
         self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)
         self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5)
예제 #2
0
 def test_multiple_treatments(self):
     np.random.seed(123)
     # Only applicable to continuous treatments
     # Generate data for 2 treatments
     TE = np.array(
         [[TestOrthoForest._exp_te(x),
           TestOrthoForest._const_te(x)] for x in TestOrthoForest.X])
     coefs_T = uniform(0, 1, size=(TestOrthoForest.support_size, 2))
     T = np.matmul(TestOrthoForest.W[:, TestOrthoForest.support], coefs_T) + \
         uniform(-1, 1, size=(TestOrthoForest.n, 2))
     delta_Y = np.array(
         [np.dot(TE[i], T[i]) for i in range(TestOrthoForest.n)])
     Y = delta_Y + np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \
         TestOrthoForest.epsilon_sample(TestOrthoForest.n)
     # Test multiple treatments with controls
     est = ContinuousTreatmentOrthoForest(
         n_trees=50,
         min_leaf_size=10,
         max_depth=50,
         subsample_ratio=0.30,
         bootstrap=False,
         n_jobs=4,
         model_T=MultiOutputRegressor(Lasso(alpha=0.024)),
         model_Y=Lasso(alpha=0.024),
         model_T_final=WeightedLassoCVWrapper(),
         model_Y_final=WeightedLassoCVWrapper())
     est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W, inference="blb")
     expected_te = np.array([
         TestOrthoForest.expected_exp_te, TestOrthoForest.expected_const_te
     ]).T
     self._test_te(est, expected_te, tol=0.5, treatment_type='multi')
     self._test_ci(est, expected_te, tol=2.0, treatment_type='multi')
 def test_binary_treatments(self):
     np.random.seed(123)
     # Generate data with binary treatments
     log_odds = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \
         TestOrthoForest.eta_sample(TestOrthoForest.n)
     T_sigmoid = 1 / (1 + np.exp(-log_odds))
     T = np.array([np.random.binomial(1, p) for p in T_sigmoid])
     TE = np.array([self._exp_te(x) for x in TestOrthoForest.X])
     Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \
         T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n)
     # Instantiate model with default params. Using n_jobs=1 since code coverage
     # does not work well with parallelism.
     est = DiscreteTreatmentOrthoForest(n_trees=10, n_jobs=1,
                                        propensity_model=LogisticRegression(), model_Y=Lasso(),
                                        propensity_model_final=LogisticRegressionCV(penalty='l1', solver='saga'),
                                        model_Y_final=WeightedLassoCVWrapper())
     # Test inputs for binary treatments
     # --> Check that one can pass in regular lists
     est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W))
     # --> Check that it fails correctly if lists of different shape are passed in
     self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2], T[:TestOrthoForest.n // 2],
                       TestOrthoForest.X, TestOrthoForest.W)
     # --> Check that it works when T, Y have shape (n, 1)
     est.fit(Y.reshape(-1, 1), T.reshape(-1, 1), TestOrthoForest.X, TestOrthoForest.W)
     # --> Check that it fails correctly when T has shape (n, 2)
     self.assertRaises(ValueError, est.fit, Y, np.ones((TestOrthoForest.n, 2)),
                       TestOrthoForest.X, TestOrthoForest.W)
     # --> Check that it fails correctly when the treatments are not numeric
     self.assertRaises(ValueError, est.fit, Y, np.array(["a"] * TestOrthoForest.n),
                       TestOrthoForest.X, TestOrthoForest.W)
     # Check that outputs have the correct shape
     out_te = est.const_marginal_effect(TestOrthoForest.x_test)
     self.assertSequenceEqual((TestOrthoForest.x_test.shape[0], 1), out_te.shape)
     # Test binary treatments with controls
     est = DiscreteTreatmentOrthoForest(n_trees=100, min_leaf_size=10,
                                        max_depth=30, subsample_ratio=0.30, bootstrap=False, n_jobs=4,
                                        propensity_model=LogisticRegression(
                                            C=1 / 0.024, penalty='l1', solver='saga'),
                                        model_Y=Lasso(alpha=0.024),
                                        propensity_model_final=LogisticRegressionCV(penalty='l1', solver='saga'),
                                        model_Y_final=WeightedLassoCVWrapper())
     est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W, inference="blb")
     self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.7, treatment_type='discrete')
     self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5, treatment_type='discrete')
     # Test binary treatments without controls
     log_odds = TestOrthoForest.eta_sample(TestOrthoForest.n)
     T_sigmoid = 1 / (1 + np.exp(-log_odds))
     T = np.array([np.random.binomial(1, p) for p in T_sigmoid])
     Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n)
     est.fit(Y, T, TestOrthoForest.X, inference="blb")
     self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5, treatment_type='discrete')
     self._test_ci(est, TestOrthoForest.expected_exp_te, tol=1.5, treatment_type='discrete')
 def __init__(self,
              model_y=WeightedLassoCVWrapper(), model_t='auto',
              alpha='auto',
              max_iter=1000,
              tol=1e-4,
              featurizer=None,
              fit_cate_intercept=True,
              linear_first_stages=True,
              discrete_treatment=False,
              n_splits=2,
              random_state=None):
     model_final = MultiOutputDebiasedLasso(
         alpha=alpha,
         fit_intercept=False,
         max_iter=max_iter,
         tol=tol)
     super().__init__(model_y=model_y,
                      model_t=model_t,
                      model_final=model_final,
                      featurizer=featurizer,
                      fit_cate_intercept=fit_cate_intercept,
                      linear_first_stages=linear_first_stages,
                      discrete_treatment=discrete_treatment,
                      n_splits=n_splits,
                      random_state=random_state)
    def __init__(self,
                 model_y, model_t, model_final,
                 featurizer=None,
                 fit_cate_intercept=True,
                 linear_first_stages=False,
                 discrete_treatment=False,
                 n_splits=2,
                 random_state=None):

        # TODO: consider whether we need more care around stateful featurizers,
        #       since we clone it and fit separate copies
        if model_t == 'auto':
            if discrete_treatment:
                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
            else:
                model_t = WeightedLassoCVWrapper()
        self.bias_part_of_coef = fit_cate_intercept
        self.fit_cate_intercept = fit_cate_intercept
        super().__init__(model_y=_FirstStageWrapper(model_y, True,
                                                    featurizer, linear_first_stages, discrete_treatment),
                         model_t=_FirstStageWrapper(model_t, False,
                                                    featurizer, linear_first_stages, discrete_treatment),
                         model_final=_FinalWrapper(model_final, fit_cate_intercept, featurizer, False),
                         discrete_treatment=discrete_treatment,
                         n_splits=n_splits,
                         random_state=random_state)
예제 #6
0
파일: test_orf.py 프로젝트: liwusen/EconML
 def test_continuous_treatments(self):
     np.random.seed(123)
     # Generate data with continuous treatments
     T = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_T) + \
         TestOrthoForest.eta_sample(TestOrthoForest.n)
     TE = np.array([self._exp_te(x) for x in TestOrthoForest.X])
     Y = np.dot(TestOrthoForest.W[:, TestOrthoForest.support], TestOrthoForest.coefs_Y) + \
         T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n)
     # Instantiate model with most of the default parameters
     est = ContinuousTreatmentOrthoForest(
         n_jobs=4,
         n_trees=10,
         model_T=Lasso(),
         model_Y=Lasso(),
         model_T_final=WeightedLassoCVWrapper(),
         model_Y_final=WeightedLassoCVWrapper())
     # Test inputs for continuous treatments
     # --> Check that one can pass in regular lists
     est.fit(list(Y), list(T), list(TestOrthoForest.X),
             list(TestOrthoForest.W))
     # --> Check that it fails correctly if lists of different shape are passed in
     self.assertRaises(ValueError, est.fit, Y[:TestOrthoForest.n // 2],
                       T[:TestOrthoForest.n // 2], TestOrthoForest.X,
                       TestOrthoForest.W)
     # Check that outputs have the correct shape
     out_te = est.const_marginal_effect(TestOrthoForest.x_test)
     self.assertSequenceEqual((TestOrthoForest.x_test.shape[0], 1),
                              out_te.shape)
     # Test continuous treatments with controls
     est = ContinuousTreatmentOrthoForest(
         n_trees=50,
         min_leaf_size=10,
         max_depth=50,
         subsample_ratio=0.30,
         bootstrap=False,
         n_jobs=4,
         model_T=Lasso(alpha=0.024),
         model_Y=Lasso(alpha=0.024),
         model_T_final=WeightedLassoCVWrapper(),
         model_Y_final=WeightedLassoCVWrapper())
     est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
     self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)
     # Test continuous treatments without controls
     T = TestOrthoForest.eta_sample(TestOrthoForest.n)
     Y = T * TE + TestOrthoForest.epsilon_sample(TestOrthoForest.n)
     est.fit(Y, T, TestOrthoForest.X)
     self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)
 def __init__(self,
              model_y=WeightedLassoCVWrapper(), model_t='auto',
              featurizer=None,
              fit_cate_intercept=True,
              linear_first_stages=True,
              discrete_treatment=False,
              n_splits=2,
              random_state=None):
     super().__init__(model_y=model_y,
                      model_t=model_t,
                      model_final=StatsModelsLinearRegression(fit_intercept=False),
                      featurizer=featurizer,
                      fit_cate_intercept=fit_cate_intercept,
                      linear_first_stages=linear_first_stages,
                      discrete_treatment=discrete_treatment,
                      n_splits=n_splits,
                      random_state=random_state)
    def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto', fit_cate_intercept=True,
                 dim=20, bw=1.0, discrete_treatment=False, n_splits=2, random_state=None):
        class RandomFeatures(TransformerMixin):
            def __init__(self, random_state):
                self._random_state = check_random_state(random_state)

            def fit(self, X):
                self.omegas = self._random_state.normal(0, 1 / bw, size=(shape(X)[1], dim))
                self.biases = self._random_state.uniform(0, 2 * np.pi, size=(1, dim))
                return self

            def transform(self, X):
                return np.sqrt(2 / dim) * np.cos(np.matmul(X, self.omegas) + self.biases)

        super().__init__(model_y=model_y, model_t=model_t,
                         model_final=ElasticNetCV(fit_intercept=False),
                         featurizer=RandomFeatures(random_state),
                         fit_cate_intercept=fit_cate_intercept,
                         discrete_treatment=discrete_treatment, n_splits=n_splits, random_state=random_state)
예제 #9
0
    def test_wrapper_attributes(self):
        """Test that attributes are properly maintained across calls to fit that switch between 1- and 2-D"""
        wrapper = WeightedLassoCVWrapper(alphas=[5, 10], max_iter=100)
        wrapper.tol = 0.01  # set an attribute manually as well

        assert wrapper.alphas == [5, 10]
        assert wrapper.max_iter == 100
        assert wrapper.tol == 0.01

        # perform 1D fit
        wrapper.fit(np.random.normal(size=(100, 3)),
                    np.random.normal(size=100))

        assert wrapper.alphas == [5, 10]
        assert wrapper.max_iter == 100
        assert wrapper.tol == 0.01

        # perform 2D fit
        wrapper.fit(np.random.normal(size=(100, 3)),
                    np.random.normal(size=(100, 2)))

        assert wrapper.alphas == [5, 10]
        assert wrapper.max_iter == 100
        assert wrapper.tol == 0.01
예제 #10
0
파일: dml.py 프로젝트: liwusen/EconML
    def __init__(self,
                 model_y, model_t, model_final,
                 featurizer=None,
                 fit_cate_intercept=True,
                 linear_first_stages=False,
                 discrete_treatment=False,
                 n_splits=2,
                 random_state=None):

        # TODO: consider whether we need more care around stateful featurizers,
        #       since we clone it and fit separate copies

        if model_t == 'auto':
            if discrete_treatment:
                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
            else:
                model_t = WeightedLassoCVWrapper()

        class FirstStageWrapper:
            def __init__(self, model, is_Y):
                self._model = clone(model, safe=False)
                self._featurizer = clone(featurizer, safe=False)
                self._is_Y = is_Y

            def _combine(self, X, W, n_samples, fitting=True):
                if X is None:
                    # if both X and W are None, just return a column of ones
                    return (W if W is not None else np.ones((n_samples, 1)))
                XW = hstack([X, W]) if W is not None else X
                if self._is_Y and linear_first_stages:
                    if self._featurizer is None:
                        F = X
                    else:
                        F = self._featurizer.fit_transform(X) if fitting else self._featurizer.transform(X)
                    return cross_product(XW, hstack([np.ones((shape(XW)[0], 1)), F]))
                else:
                    return XW

            def fit(self, X, W, Target, sample_weight=None):
                if (not self._is_Y) and discrete_treatment:
                    # In this case, the Target is the one-hot-encoding of the treatment variable
                    # We need to go back to the label representation of the one-hot so as to call
                    # the classifier.
                    if np.any(np.all(Target == 0, axis=0)) or (not np.any(np.all(Target == 0, axis=1))):
                        raise AttributeError("Provided crossfit folds contain training splits that " +
                                             "don't contain all treatments")
                    Target = inverse_onehot(Target)

                if sample_weight is not None:
                    self._model.fit(self._combine(X, W, Target.shape[0]), Target, sample_weight=sample_weight)
                else:
                    self._model.fit(self._combine(X, W, Target.shape[0]), Target)

            def predict(self, X, W):
                n_samples = X.shape[0] if X is not None else (W.shape[0] if W is not None else 1)
                if (not self._is_Y) and discrete_treatment:
                    return self._model.predict_proba(self._combine(X, W, n_samples, fitting=False))[:, 1:]
                else:
                    return self._model.predict(self._combine(X, W, n_samples, fitting=False))

        class FinalWrapper:
            def __init__(self):
                self._model = clone(model_final, safe=False)
                self._original_featurizer = clone(featurizer, safe=False)
                self._fit_cate_intercept = fit_cate_intercept
                if self._fit_cate_intercept:
                    add_intercept = FunctionTransformer(lambda F:
                                                        hstack([np.ones((F.shape[0], 1)), F]))
                    if featurizer:
                        self._featurizer = Pipeline([('featurize', self._original_featurizer),
                                                     ('add_intercept', add_intercept)])
                    else:
                        self._featurizer = add_intercept
                else:
                    self._featurizer = self._original_featurizer

            def _combine(self, X, T, fitting=True):
                if X is not None:
                    if self._featurizer is not None:
                        F = self._featurizer.fit_transform(X) if fitting else self._featurizer.transform(X)
                    else:
                        F = X
                else:
                    if not self._fit_cate_intercept:
                        raise AttributeError("Cannot have X=None and also not allow for a CATE intercept!")
                    F = np.ones((T.shape[0], 1))
                return cross_product(F, T)

            def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None):
                # Track training dimensions to see if Y or T is a vector instead of a 2-dimensional array
                self._d_t = shape(T_res)[1:]
                self._d_y = shape(Y_res)[1:]
                fts = self._combine(X, T_res)
                if sample_weight is not None:
                    if sample_var is not None:
                        self._model.fit(fts,
                                        Y_res, sample_weight=sample_weight, sample_var=sample_var)
                    else:
                        self._model.fit(fts,
                                        Y_res, sample_weight=sample_weight)
                else:
                    self._model.fit(fts, Y_res)

                self._intercept = None
                intercept = self._model.predict(np.zeros_like(fts[0:1]))
                if (np.count_nonzero(intercept) > 0):
                    warn("The final model has a nonzero intercept for at least one outcome; "
                         "it will be subtracted, but consider fitting a model without an intercept if possible.",
                         UserWarning)
                    self._intercept = intercept

            def predict(self, X):
                X2, T = broadcast_unit_treatments(X if X is not None else np.empty((1, 0)),
                                                  self._d_t[0] if self._d_t else 1)
                prediction = self._model.predict(self._combine(None if X is None else X2, T, fitting=False))
                if self._intercept is not None:
                    prediction -= self._intercept
                return reshape_treatmentwise_effects(prediction,
                                                     self._d_t, self._d_y)
        self.bias_part_of_coef = fit_cate_intercept
        self.fit_cate_intercept = fit_cate_intercept
        super().__init__(model_y=FirstStageWrapper(model_y, is_Y=True),
                         model_t=FirstStageWrapper(model_t, is_Y=False),
                         model_final=FinalWrapper(),
                         discrete_treatment=discrete_treatment,
                         n_splits=n_splits,
                         random_state=random_state)
예제 #11
0
subsample_ratio = 0.04

#%%
# Definition of range of variable tested for heterogeneity
min_tfsum = 0.0
max_tfsum = 24.0
delta = (max_tfsum - min_tfsum) / 100
X_test = np.arange(min_tfsum, max_tfsum + delta - 0.001, delta).reshape(-1, 1)

#%%
# Estimation of causal tree
est = CausalForest(n_trees=n_trees,
                   min_leaf_size=min_leaf_size,
                   max_depth=max_depth,
                   subsample_ratio=subsample_ratio,
                   model_T=WeightedLassoCVWrapper(cv=3),
                   model_Y=WeightedLassoCVWrapper(cv=3),
                   random_state=123)
est.fit(Y, T, X=X, W=W)
treatment_effects = est.effect(X_test)
te_lower, te_upper = est.effect_interval(X_test)

#%%
# Plot results
plt.figure(figsize=(15, 5))
plt.plot(X_test.flatten(), treatment_effects)
plt.fill_between(X_test.flatten(),
                 te_lower,
                 te_upper,
                 label="90% CI",
                 alpha=0.3)