Ejemplo n.º 1
0
    def test_nonparam_dml(self):
        y, T, X, W = self._get_data()

        dml = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=WeightedLasso(),
                          random_state=123)
        dml.fit(y, T, X=X, W=W)
        with pytest.raises(Exception):
            dml.refit_final()
        dml.fit(y, T, X=X, W=W, cache_values=True)
        dml.model_final = DebiasedLasso(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, DebiasedLasso)
        dml.effect_interval(X[:1])
        dml.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        dml.refit_final()
        assert isinstance(dml.featurizer_, PolynomialFeatures)
        dml.effect_interval(X[:1])
        dml.discrete_treatment = True
        dml.featurizer = None
        dml.linear_first_stages = True
        dml.model_t = LogisticRegression()
        dml.model_final = DebiasedLasso()
        dml.fit(y, T, X=X, W=W)
        newdml = NonParamDML(model_y=LinearRegression(),
                             model_t=LogisticRegression(),
                             model_final=DebiasedLasso(),
                             discrete_treatment=True,
                             random_state=123).fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(dml.effect(X[:1]), newdml.effect(X[:1]))
        np.testing.assert_array_equal(dml.effect_interval(X[:1])[0], newdml.effect_interval(X[:1])[0])
Ejemplo n.º 2
0
 def _check_debiased_coefs(self,
                           X,
                           y,
                           sample_weight,
                           expected_coefs,
                           expected_intercept=0,
                           params={}):
     debiased_lasso = MultiOutputDebiasedLasso(
     ) if np.ndim(y) > 1 else DebiasedLasso()
     debiased_lasso.set_params(**params)
     debiased_lasso.fit(X, y, sample_weight)
     all_params = debiased_lasso.get_params()
     # Check coeffcients and intercept are the same within tolerance
     if np.ndim(y) > 1:
         for i in range(y.shape[1]):
             np.testing.assert_allclose(debiased_lasso.coef_[i],
                                        expected_coefs[i],
                                        atol=5e-2)
             if all_params["fit_intercept"]:
                 self.assertAlmostEqual(debiased_lasso.intercept_[i],
                                        expected_intercept[i],
                                        delta=1e-2)
     else:
         np.testing.assert_allclose(debiased_lasso.coef_,
                                    expected_coefs,
                                    atol=5e-2)
         if all_params["fit_intercept"]:
             self.assertAlmostEqual(debiased_lasso.intercept_,
                                    expected_intercept,
                                    delta=1e-2)
     return debiased_lasso.coef_
Ejemplo n.º 3
0
 def _check_debiased_CI(self,
                        X,
                        y,
                        sample_weight,
                        expected_coefs,
                        expected_intercept=0,
                        n_experiments=200,
                        params={}):
     # Unit vectors
     X_test = np.eye(TestLassoExtensions.n_dim)
     y_test_mean = expected_intercept + expected_coefs
     is_in_interval = np.zeros((n_experiments, TestLassoExtensions.n_dim))
     for i in range(n_experiments):
         np.random.seed(i)
         X_exp = np.random.normal(size=X.shape)
         err = np.random.normal(scale=TestLassoExtensions.error_sd,
                                size=X.shape[0])
         y_exp = expected_intercept + np.dot(X_exp, expected_coefs) + err
         debiased_lasso = DebiasedLasso()
         debiased_lasso.set_params(**params)
         debiased_lasso.fit(X_exp, y_exp, sample_weight)
         y_lower, y_upper = debiased_lasso.predict_interval(X_test,
                                                            alpha=0.1)
         is_in_interval[i] = ((y_test_mean >= y_lower) &
                              (y_test_mean <= y_upper))
     CI_coverage = np.mean(is_in_interval, axis=0)
     self.assertTrue(all(CI_coverage >= 0.85))
     self.assertTrue(all(CI_coverage <= 0.95))
Ejemplo n.º 4
0
 def test_multi_output_debiased_lasso(self):
     """Test MultiOutputDebiasedLasso."""
     # Test that attributes propagate correctly
     est = MultiOutputDebiasedLasso()
     multioutput_attrs = est.get_params()
     debiased_attrs = DebiasedLasso().get_params()
     for attr in debiased_attrs:
         self.assertTrue(attr in multioutput_attrs)
     # Test MultiOutputDebiasedLasso without weights
     # --> Check debiased coeffcients without intercept
     params = {'fit_intercept': False}
     self._check_debiased_coefs(TestLassoExtensions.X, TestLassoExtensions.y_2D_consistent,
                                sample_weight=None,
                                expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2],
                                params=params)
     # --> Check debiased coeffcients with intercept
     intercept_2D = np.array([TestLassoExtensions.intercept1, TestLassoExtensions.intercept2])
     self._check_debiased_coefs(TestLassoExtensions.X,
                                TestLassoExtensions.y_2D_consistent + intercept_2D,
                                sample_weight=None,
                                expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2],
                                expected_intercept=intercept_2D)
     # --> Check CI coverage
     self._check_debiased_CI_2D(TestLassoExtensions.X,
                                TestLassoExtensions.y_2D_consistent + intercept_2D,
                                sample_weight=None,
                                expected_coefs=np.array([TestLassoExtensions.coefs1, TestLassoExtensions.coefs2]),
                                expected_intercept=intercept_2D)
     # Test MultiOutputDebiasedLasso with weights
     # Define weights
     sample_weight = np.concatenate((np.ones(TestLassoExtensions.n_samples // 2),
                                     np.ones(TestLassoExtensions.n_samples // 2) * 2))
     # Define extended datasets
     X_expanded = np.concatenate(
         (TestLassoExtensions.X, TestLassoExtensions.X[TestLassoExtensions.n_samples // 2:]))
     y_expanded = np.concatenate(
         (TestLassoExtensions.y_2D_consistent,
          TestLassoExtensions.y_2D_consistent[TestLassoExtensions.n_samples // 2:]))
     # --> Check debiased coefficients
     weighted_debiased_coefs = self._check_debiased_coefs(
         TestLassoExtensions.X,
         TestLassoExtensions.y_2D_consistent,
         sample_weight=sample_weight,
         expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2],
         params=params)
     expanded_debiased_coefs = self._check_debiased_coefs(
         X_expanded, y_expanded, sample_weight=None,
         expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2],
         params=params)
     for i in range(2):
         self.assertTrue(np.allclose(weighted_debiased_coefs[i], expanded_debiased_coefs[i]))
Ejemplo n.º 5
0
    def test_auto_inference(self):
        Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
        est = DRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        model_final=StatsModelsLinearRegression())
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        est.marginal_effect_inference(T, X).summary_frame()
        est = DRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        model_final=LinearRegression(),
                        multitask_model_final=True)
        est.fit(Y, T, X=X, W=W)
        with pytest.raises(AttributeError):
            est.effect_inference(X)

        est = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  model_final=StatsModelsLinearRegression(fit_intercept=False),
                  random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.summary()
        est.coef__inference().summary_frame()
        assert est.coef__inference().stderr is not None
        est.intercept__inference().summary_frame()
        assert est.intercept__inference().stderr is not None
        est.effect_inference(X).summary_frame()
        assert est.effect_inference(X).stderr is not None
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        assert est.const_marginal_effect_inference(X).stderr is not None
        est.marginal_effect_inference(T, X).summary_frame()
        assert est.marginal_effect_inference(T, X).stderr is not None

        est = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=DebiasedLasso(),
                          random_state=123)
        est.fit(Y, T, X=X, W=W)
        est.effect_inference(X).summary_frame()
        assert est.effect_inference(X).stderr is not None
        est.effect_inference(X).population_summary()
        est.const_marginal_effect_inference(X).summary_frame()
        assert est.const_marginal_effect_inference(X).stderr is not None
        est.marginal_effect_inference(T, X).summary_frame()
        assert est.marginal_effect_inference(T, X).stderr is not None
Ejemplo n.º 6
0
 def __init__(self,
              model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
              model_regression=WeightedLassoCV(cv=3),
              featurizer=None,
              fit_cate_intercept=True,
              alpha='auto',
              max_iter=1000,
              tol=1e-4,
              n_splits=2, random_state=None):
     model_final = DebiasedLasso(
         alpha=alpha,
         fit_intercept=fit_cate_intercept,
         max_iter=max_iter,
         tol=tol)
     super().__init__(model_propensity=model_propensity,
                      model_regression=model_regression,
                      model_final=model_final,
                      featurizer=featurizer,
                      multitask_model_final=False,
                      n_splits=n_splits,
                      random_state=random_state)
Ejemplo n.º 7
0
    def test_dml(self):
        """Test setting attributes and refitting"""
        y, T, X, W = self._get_data()

        dml = DML(model_y=LinearRegression(),
                  model_t=LinearRegression(),
                  model_final=StatsModelsLinearRegression(fit_intercept=False),
                  linear_first_stages=False,
                  random_state=123)
        dml.fit(y, T, X=X, W=W)
        with pytest.raises(Exception):
            dml.refit_final()
        dml.fit(y, T, X=X, W=W, cache_values=True)
        dml.model_final = StatsModelsRLM(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, StatsModelsRLM)
        np.testing.assert_array_equal(dml.model_cate.coef_[1:].flatten(), dml.coef_.flatten())
        lb, ub = dml.model_cate.coef__interval(alpha=0.01)
        lbt, ubt = dml.coef__interval(alpha=0.01)
        np.testing.assert_array_equal(lb[1:].flatten(), lbt.flatten())
        np.testing.assert_array_equal(ub[1:].flatten(), ubt.flatten())
        intcpt = dml.intercept_
        dml.fit_cate_intercept = False
        np.testing.assert_equal(dml.intercept_, intcpt)
        dml.refit_final()
        np.testing.assert_array_equal(dml.model_cate.coef_.flatten(), dml.coef_.flatten())
        lb, ub = dml.model_cate.coef__interval(alpha=0.01)
        lbt, ubt = dml.coef__interval(alpha=0.01)
        np.testing.assert_array_equal(lb.flatten(), lbt.flatten())
        np.testing.assert_array_equal(ub.flatten(), ubt.flatten())
        with pytest.raises(AttributeError):
            dml.intercept_
        with pytest.raises(AttributeError):
            dml.intercept__interval()
        dml.model_final = DebiasedLasso(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, DebiasedLasso)
        dml.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        dml.model_final = StatsModelsLinearRegression(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.featurizer_, PolynomialFeatures)
        dml.fit_cate_intercept = True
        dml.refit_final()
        assert isinstance(dml.featurizer_, Pipeline)
        np.testing.assert_array_equal(dml.coef_.shape, (X.shape[1]**2))
        np.testing.assert_array_equal(dml.coef__interval()[0].shape, (X.shape[1]**2))
        coefpre = dml.coef_
        coefpreint = dml.coef__interval()
        dml.fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(coefpre, dml.coef_)
        np.testing.assert_array_equal(coefpreint[0], dml.coef__interval()[0])
        dml.discrete_treatment = True
        dml.featurizer = None
        dml.linear_first_stages = True
        dml.model_t = LogisticRegression()
        dml.fit(y, T, X=X, W=W)
        newdml = DML(model_y=LinearRegression(),
                     model_t=LogisticRegression(),
                     model_final=StatsModelsLinearRegression(fit_intercept=False),
                     discrete_treatment=True,
                     linear_first_stages=True,
                     random_state=123).fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(dml.coef_, newdml.coef_)
        np.testing.assert_array_equal(dml.coef__interval()[0], newdml.coef__interval()[0])

        ldml = LinearDML(model_y=LinearRegression(),
                         model_t=LinearRegression(),
                         linear_first_stages=False)
        ldml.fit(y, T, X=X, W=W, cache_values=True)
        # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression)
        with pytest.raises(ValueError):
            ldml.model_final = StatsModelsRLM()

        ldml = SparseLinearDML(model_y=LinearRegression(),
                               model_t=LinearRegression(),
                               linear_first_stages=False)
        ldml.fit(y, T, X=X, W=W, cache_values=True)
        # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression)
        with pytest.raises(ValueError):
            ldml.model_final = StatsModelsRLM()
        ldml.alpha = 0.01
        ldml.max_iter = 10
        ldml.tol = 0.01
        ldml.refit_final()
        np.testing.assert_equal(ldml.model_cate.estimators_[0].alpha, 0.01)
        np.testing.assert_equal(ldml.model_cate.estimators_[0].max_iter, 10)
        np.testing.assert_equal(ldml.model_cate.estimators_[0].tol, 0.01)