Exemple #1
0
 def test_drlearner_clipping(self):
     X = np.linspace(0, 1, 200).reshape(-1, 1)
     T = np.random.binomial(1, X)
     Y = np.random.normal(size=T.shape)
     X[0] = -1000  # one split will have only X values between 0 and 1,
     # so the predicted propensity for this point will be extremely low
     learner = DRLearner()
     learner.fit(Y, T, X)
     effect = learner.const_marginal_effect(np.array([[0.5]]))
     assert not(np.any(np.isnan(effect)))
Exemple #2
0
    def test_drlearner_all_attributes(self):
        from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
        from sklearn.linear_model import LinearRegression, LogisticRegression
        from econml.utilities import StatsModelsLinearRegression
        import scipy.special
        np.random.seed(123)
        controls = np.random.uniform(-1, 1, size=(5000, 3))
        T = np.random.binomial(2, scipy.special.expit(controls[:, 0]))
        sigma = 0.01
        y = (1 + .5 * controls[:, 0]) * T + controls[:,
                                                     0] + np.random.normal(0, sigma, size=(5000,))
        for X in [controls]:
            for W in [None, controls]:
                for sample_weight in [None, 1 + np.random.randint(10, size=X.shape[0])]:
                    for sample_var in [None, 1 + np.random.randint(10, size=X.shape[0])]:
                        for featurizer in [None, PolynomialFeatures(degree=2, include_bias=False)]:
                            for models in [(GradientBoostingClassifier(), GradientBoostingRegressor(),
                                            RandomForestRegressor(n_estimators=100,
                                                                  max_depth=5, min_samples_leaf=50)),
                                           (GradientBoostingClassifier(), GradientBoostingRegressor(),
                                            RandomForestRegressor(n_estimators=100,
                                                                  max_depth=5, min_samples_leaf=50)),
                                           (LogisticRegression(solver='lbfgs', multi_class='auto'),
                                            LinearRegression(), StatsModelsLinearRegression())]:
                                for multitask_model_final in [False, True]:
                                    if (not isinstance(models, StatsModelsLinearRegression))\
                                            and (sample_var is not None):
                                        continue
                                    with self.subTest(X=X, W=W, sample_weight=sample_weight, sample_var=sample_var,
                                                      featurizer=featurizer, models=models,
                                                      multitask_model_final=multitask_model_final):
                                        est = DRLearner(model_propensity=models[0],
                                                        model_regression=models[1],
                                                        model_final=models[2],
                                                        featurizer=featurizer,
                                                        multitask_model_final=multitask_model_final)
                                        if (X is None) and (W is None):
                                            with pytest.raises(AttributeError) as e_info:
                                                est.fit(y, T, X=X, W=W,
                                                        sample_weight=sample_weight, sample_var=sample_var)
                                            continue
                                        est.fit(
                                            y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var)
                                        np.testing.assert_allclose(est.effect(X[:3], T0=0, T1=1), 1 + .5 * X[:3, 0],
                                                                   rtol=0, atol=.15)
                                        np.testing.assert_allclose(est.const_marginal_effect(X[:3]),
                                                                   np.hstack(
                                                                       [1 + .5 * X[:3, [0]],
                                                                        2 * (1 + .5 * X[:3, [0]])]),
                                                                   rtol=0, atol=.15)
                                        for t in [1, 2]:
                                            np.testing.assert_allclose(est.marginal_effect(t, X[:3]),
                                                                       np.hstack([1 + .5 * X[:3, [0]],
                                                                                  2 * (1 + .5 * X[:3, [0]])]),
                                                                       rtol=0, atol=.15)
                                        assert isinstance(est.score_, float)
                                        assert isinstance(
                                            est.score(y, T, X=X, W=W), float)

                                        feat_names = ['A', 'B', 'C']
                                        out_feat_names = feat_names
                                        if featurizer is not None:
                                            out_feat_names = featurizer.fit(
                                                X).get_feature_names(feat_names)
                                            np.testing.assert_array_equal(
                                                est.featurizer.n_input_features_, 3)
                                        np.testing.assert_array_equal(est.cate_feature_names(feat_names),
                                                                      out_feat_names)

                                        if isinstance(models[0], GradientBoostingClassifier):
                                            np.testing.assert_array_equal(np.array([mdl.feature_importances_
                                                                                    for mdl
                                                                                    in est.models_regression]).shape,
                                                                          [2, 2 + X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                            np.testing.assert_array_equal(np.array([mdl.feature_importances_
                                                                                    for mdl
                                                                                    in est.models_propensity]).shape,
                                                                          [2, X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                        else:
                                            np.testing.assert_array_equal(np.array([mdl.coef_
                                                                                    for mdl
                                                                                    in est.models_regression]).shape,
                                                                          [2, 2 + X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                            np.testing.assert_array_equal(np.array([mdl.coef_
                                                                                    for mdl
                                                                                    in est.models_propensity]).shape,
                                                                          [2, 3, X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                        if multitask_model_final:
                                            if isinstance(models[2], RandomForestRegressor):
                                                np.testing.assert_equal(np.argsort(
                                                    est.multitask_model_cate.feature_importances_)[-1], 0)
                                            else:
                                                true_coef = np.zeros(
                                                    (2, len(out_feat_names)))
                                                true_coef[:, 0] = [.5, 1]
                                                np.testing.assert_allclose(
                                                    est.multitask_model_cate.coef_, true_coef, rtol=0, atol=.15)
                                                np.testing.assert_allclose(
                                                    est.multitask_model_cate.intercept_, [1, 2], rtol=0, atol=.15)
                                        else:
                                            for t in [1, 2]:
                                                if isinstance(models[2], RandomForestRegressor):
                                                    np.testing.assert_equal(np.argsort(
                                                        est.model_cate(T=t).feature_importances_)[-1], 0)
                                                else:
                                                    true_coef = np.zeros(
                                                        len(out_feat_names))
                                                    true_coef[0] = .5 * t
                                                    np.testing.assert_allclose(
                                                        est.model_cate(T=t).coef_, true_coef, rtol=0, atol=.15)
                                                    np.testing.assert_allclose(
                                                        est.model_cate(T=t).intercept_, t, rtol=0, atol=.15)