Example #1
0
    def test_nonparam_dml(self):
        y, T, X, W = self._get_data()

        dml = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=WeightedLasso(),
                          random_state=123)
        dml.fit(y, T, X=X, W=W)
        with pytest.raises(Exception):
            dml.refit_final()
        dml.fit(y, T, X=X, W=W, cache_values=True)
        dml.model_final = DebiasedLasso(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, DebiasedLasso)
        dml.effect_interval(X[:1])
        dml.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        dml.refit_final()
        assert isinstance(dml.featurizer_, PolynomialFeatures)
        dml.effect_interval(X[:1])
        dml.discrete_treatment = True
        dml.featurizer = None
        dml.linear_first_stages = True
        dml.model_t = LogisticRegression()
        dml.model_final = DebiasedLasso()
        dml.fit(y, T, X=X, W=W)
        newdml = NonParamDML(model_y=LinearRegression(),
                             model_t=LogisticRegression(),
                             model_final=DebiasedLasso(),
                             discrete_treatment=True,
                             random_state=123).fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(dml.effect(X[:1]), newdml.effect(X[:1]))
        np.testing.assert_array_equal(dml.effect_interval(X[:1])[0], newdml.effect_interval(X[:1])[0])
Example #2
0
    def test_inference_with_none_stderr(self):
        Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
        est = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  model_final=Lasso(alpha=0.1, fit_intercept=False),
                  featurizer=PolynomialFeatures(degree=1, include_bias=False),
                  random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.summary()
        est.coef__inference().summary_frame()
        est.intercept__inference().summary_frame()
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()

        est = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=LinearRegression(fit_intercept=False),
                          featurizer=PolynomialFeatures(degree=1, include_bias=False),
                          random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()

        est = DRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        model_final=LinearRegression())
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()
Example #3
0
    def test_auto_inference(self):
        Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
        est = DRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        model_final=StatsModelsLinearRegression())
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()
        est = DRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        model_final=LinearRegression(),
                        multitask_model_final=True)
        est.fit(Y, T, X=X, W=W)
        with pytest.raises(AttributeError):
            est.effect_inference(X)

        est = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  model_final=StatsModelsLinearRegression(fit_intercept=False),
                  random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.summary()
        est.coef__inference().summary_frame()
        assert est.coef__inference().stderr is not None
        est.intercept__inference().summary_frame()
        assert est.intercept__inference().stderr is not None
        est.effect_inference(X).summary_frame()
        assert est.effect_inference(X).stderr is not None
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        assert est.const_marginal_effect_inference(X).stderr is not None
        est.marginal_effect_inference(T, X).summary_frame()
        assert est.marginal_effect_inference(T, X).stderr is not None

        est = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=DebiasedLasso(),
                          random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        assert est.effect_inference(X).stderr is not None
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        assert est.const_marginal_effect_inference(X).stderr is not None
        est.marginal_effect_inference(T, X).summary_frame()
        assert est.marginal_effect_inference(T, X).stderr is not None
Example #4
0
 def test_dml_random_state(self):
     Y, T, X, W, X_test = TestRandomState._make_data(500, 2)
     for est in [
             NonParamDML(model_y=RandomForestRegressor(n_estimators=10,
                                                       max_depth=4,
                                                       random_state=123),
                         model_t=RandomForestClassifier(n_estimators=10,
                                                        max_depth=4,
                                                        random_state=123),
                         model_final=RandomForestRegressor(
                             max_depth=3,
                             n_estimators=10,
                             min_samples_leaf=100,
                             bootstrap=True,
                             random_state=123),
                         discrete_treatment=True,
                         n_splits=2,
                         random_state=123),
             CausalForestDML(
                 model_y=RandomForestRegressor(n_estimators=10,
                                               max_depth=4,
                                               random_state=123),
                 model_t=RandomForestClassifier(n_estimators=10,
                                                max_depth=4,
                                                random_state=123),
                 n_estimators=8,
                 discrete_treatment=True,
                 cv=2,
                 random_state=123),
             LinearDML(model_y=RandomForestRegressor(n_estimators=10,
                                                     max_depth=4,
                                                     random_state=123),
                       model_t=RandomForestClassifier(n_estimators=10,
                                                      max_depth=4,
                                                      random_state=123),
                       discrete_treatment=True,
                       n_splits=2,
                       random_state=123),
             SparseLinearDML(discrete_treatment=True,
                             n_splits=2,
                             random_state=123),
             KernelDML(discrete_treatment=True,
                       n_splits=2,
                       random_state=123)
     ]:
         TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
Example #5
0
def dml(outcome, treatment, data, method='GBR'):
    if method == 'GBR':
        est = NonParamDML(model_y=GradientBoostingRegressor(),
                          model_t=GradientBoostingClassifier(),
                          model_final=GradientBoostingRegressor(),
                          discrete_treatment=True)
        est.fit(data[outcome],
                data[treatment],
                X=data.drop(columns=[outcome, treatment]),
                W=data.drop(columns=[outcome, treatment]))
        point = est.ate(data.drop(columns=[outcome, treatment]), T0=0, T1=1)
    if method == 'linear':
        est = LinearDML(discrete_treatment=True)
        est.fit(data[outcome],
                data[treatment],
                X=data.drop(columns=[outcome, treatment]),
                W=data.drop(columns=[outcome, treatment]))
        point = est.ate(data.drop(columns=[outcome, treatment]), T0=0, T1=1)
    return point
Example #6
0
    def test_comparison(self):
        def reg():
            return LinearRegression()

        def clf():
            return LogisticRegression()

        y, T, X, true_eff = self._get_data()
        (X_train, X_val, T_train, T_val, Y_train, Y_val, _,
         true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4)

        models = [
            ('ldml',
             LinearDML(model_y=reg(),
                       model_t=clf(),
                       discrete_treatment=True,
                       linear_first_stages=False,
                       cv=3)),
            ('sldml',
             SparseLinearDML(model_y=reg(),
                             model_t=clf(),
                             discrete_treatment=True,
                             featurizer=PolynomialFeatures(degree=2,
                                                           include_bias=False),
                             linear_first_stages=False,
                             cv=3)),
            ('xlearner',
             XLearner(models=reg(), cate_models=reg(),
                      propensity_model=clf())),
            ('dalearner',
             DomainAdaptationLearner(models=reg(),
                                     final_models=reg(),
                                     propensity_model=clf())),
            ('slearner', SLearner(overall_model=reg())),
            ('tlearner', TLearner(models=reg())),
            ('drlearner',
             DRLearner(model_propensity=clf(),
                       model_regression=reg(),
                       model_final=reg(),
                       cv=3)),
            ('rlearner',
             NonParamDML(model_y=reg(),
                         model_t=clf(),
                         model_final=reg(),
                         discrete_treatment=True,
                         cv=3)),
            ('dml3dlasso',
             DML(model_y=reg(),
                 model_t=clf(),
                 model_final=reg(),
                 discrete_treatment=True,
                 featurizer=PolynomialFeatures(degree=3),
                 linear_first_stages=False,
                 cv=3))
        ]

        models = Parallel(n_jobs=-1, verbose=1)(
            delayed(_fit_model)(name, mdl, Y_train, T_train, X_train)
            for name, mdl in models)

        scorer = RScorer(model_y=reg(),
                         model_t=clf(),
                         discrete_treatment=True,
                         cv=3,
                         mc_iters=2,
                         mc_agg='median')
        scorer.fit(Y_val, T_val, X=X_val)
        rscore = [scorer.score(mdl) for _, mdl in models]
        rootpehe_score = [
            np.sqrt(
                np.mean(
                    (true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
            for _, mdl in models
        ]
        assert LinearRegression().fit(
            np.array(rscore).reshape(-1, 1),
            np.array(rootpehe_score)).coef_ < 0.5
        mdl, _ = scorer.best_model([mdl for _, mdl in models])
        rootpehe_best = np.sqrt(
            np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
        assert rootpehe_best < 1.2 * np.min(rootpehe_score)
        mdl, _ = scorer.ensemble([mdl for _, mdl in models])
        rootpehe_ensemble = np.sqrt(
            np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
        assert rootpehe_ensemble < 1.2 * np.min(rootpehe_score)