Beispiel #1
0
 def test_pickle_inferenceresult(self):
     Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
     est = DML(model_y=LinearRegression(),
               model_t=LinearRegression(),
               model_final=Lasso(alpha=0.1, fit_intercept=False),
               featurizer=PolynomialFeatures(degree=1, include_bias=False),
               random_state=123)
     est.fit(Y, T, X=X, W=W)
     effect_inf = est.effect_inference(X)
     s = pickle.dumps(effect_inf)
Beispiel #2
0
    def test_rlearner_residuals(self):
        y, T, X, W = self._get_data()

        dml = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  cv=1,
                  model_final=StatsModelsLinearRegression(fit_intercept=False),
                  linear_first_stages=False,
                  random_state=123)
        with pytest.raises(AttributeError):
            y_res, T_res, X_res, W_res = dml.residuals_
        dml.fit(y, T, X=X, W=W)
        with pytest.raises(AttributeError):
            y_res, T_res, X_res, W_res = dml.residuals_
        dml.fit(y, T, X=X, W=W, cache_values=True)
        y_res, T_res, X_res, W_res = dml.residuals_
        np.testing.assert_array_equal(X, X_res)
        np.testing.assert_array_equal(W, W_res)
        XW = np.hstack([X, W])
        np.testing.assert_array_equal(y_res, y - LinearRegression().fit(XW, y).predict(XW))
        np.testing.assert_array_equal(T_res, T - LinearRegression().fit(XW, T).predict(XW))
Beispiel #3
0
    def test_inference_with_none_stderr(self):
        Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
        est = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  model_final=Lasso(alpha=0.1, fit_intercept=False),
                  featurizer=PolynomialFeatures(degree=1, include_bias=False),
                  random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.summary()
        est.coef__inference().summary_frame()
        est.intercept__inference().summary_frame()
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()

        est = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=LinearRegression(fit_intercept=False),
                          featurizer=PolynomialFeatures(degree=1, include_bias=False),
                          random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()

        est = DRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        model_final=LinearRegression())
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()
Beispiel #4
0
    def test_dml(self):
        """Test setting attributes and refitting"""
        y, T, X, W = self._get_data()

        dml = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  model_final=StatsModelsLinearRegression(fit_intercept=False),
                  linear_first_stages=False,
                  random_state=123)
        dml.fit(y, T, X=X, W=W)
        with pytest.raises(Exception):
            dml.refit_final()
        dml.fit(y, T, X=X, W=W, cache_values=True)
        dml.model_final = StatsModelsRLM(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, StatsModelsRLM)
        np.testing.assert_array_equal(dml.model_cate.coef_[1:].flatten(), dml.coef_.flatten())
        lb, ub = dml.model_cate.coef__interval(alpha=0.01)
        lbt, ubt = dml.coef__interval(alpha=0.01)
        np.testing.assert_array_equal(lb[1:].flatten(), lbt.flatten())
        np.testing.assert_array_equal(ub[1:].flatten(), ubt.flatten())
        intcpt = dml.intercept_
        dml.fit_cate_intercept = False
        np.testing.assert_equal(dml.intercept_, intcpt)
        dml.refit_final()
        np.testing.assert_array_equal(dml.model_cate.coef_.flatten(), dml.coef_.flatten())
        lb, ub = dml.model_cate.coef__interval(alpha=0.01)
        lbt, ubt = dml.coef__interval(alpha=0.01)
        np.testing.assert_array_equal(lb.flatten(), lbt.flatten())
        np.testing.assert_array_equal(ub.flatten(), ubt.flatten())
        with pytest.raises(AttributeError):
            dml.intercept_
        with pytest.raises(AttributeError):
            dml.intercept__interval()
        dml.model_final = DebiasedLasso(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, DebiasedLasso)
        dml.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        dml.model_final = StatsModelsLinearRegression(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.featurizer_, PolynomialFeatures)
        dml.fit_cate_intercept = True
        dml.refit_final()
        assert isinstance(dml.featurizer_, Pipeline)
        np.testing.assert_array_equal(dml.coef_.shape, (X.shape[1]**2))
        np.testing.assert_array_equal(dml.coef__interval()[0].shape, (X.shape[1]**2))
        coefpre = dml.coef_
        coefpreint = dml.coef__interval()
        dml.fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(coefpre, dml.coef_)
        np.testing.assert_array_equal(coefpreint[0], dml.coef__interval()[0])
        dml.discrete_treatment = True
        dml.featurizer = None
        dml.linear_first_stages = True
        dml.model_t = LogisticRegression()
        dml.fit(y, T, X=X, W=W)
        newdml = DML(model_y=LinearRegression(),
                     model_t=LogisticRegression(),
                     model_final=StatsModelsLinearRegression(fit_intercept=False),
                     discrete_treatment=True,
                     linear_first_stages=True,
                     random_state=123).fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(dml.coef_, newdml.coef_)
        np.testing.assert_array_equal(dml.coef__interval()[0], newdml.coef__interval()[0])

        ldml = LinearDML(model_y=LinearRegression(),
                         model_t=LinearRegression(),
                         linear_first_stages=False)
        ldml.fit(y, T, X=X, W=W, cache_values=True)
        # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression)
        with pytest.raises(ValueError):
            ldml.model_final = StatsModelsRLM()

        ldml = SparseLinearDML(model_y=LinearRegression(),
                               model_t=LinearRegression(),
                               linear_first_stages=False)
        ldml.fit(y, T, X=X, W=W, cache_values=True)
        # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression)
        with pytest.raises(ValueError):
            ldml.model_final = StatsModelsRLM()
        ldml.alpha = 0.01
        ldml.max_iter = 10
        ldml.tol = 0.01
        ldml.refit_final()
        np.testing.assert_equal(ldml.model_cate.estimators_[0].alpha, 0.01)
        np.testing.assert_equal(ldml.model_cate.estimators_[0].max_iter, 10)
        np.testing.assert_equal(ldml.model_cate.estimators_[0].tol, 0.01)