コード例 #1
0
    def test_nonparam_dml(self):
        y, T, X, W = self._get_data()

        dml = NonParamDML(model_y=LinearRegression(),
                          model_t=LinearRegression(),
                          model_final=WeightedLasso(),
                          random_state=123)
        dml.fit(y, T, X=X, W=W)
        with pytest.raises(Exception):
            dml.refit_final()
        dml.fit(y, T, X=X, W=W, cache_values=True)
        dml.model_final = DebiasedLasso(fit_intercept=False)
        dml.refit_final()
        assert isinstance(dml.model_cate, DebiasedLasso)
        dml.effect_interval(X[:1])
        dml.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        dml.refit_final()
        assert isinstance(dml.featurizer_, PolynomialFeatures)
        dml.effect_interval(X[:1])
        dml.discrete_treatment = True
        dml.featurizer = None
        dml.linear_first_stages = True
        dml.model_t = LogisticRegression()
        dml.model_final = DebiasedLasso()
        dml.fit(y, T, X=X, W=W)
        newdml = NonParamDML(model_y=LinearRegression(),
                             model_t=LogisticRegression(),
                             model_final=DebiasedLasso(),
                             discrete_treatment=True,
                             random_state=123).fit(y, T, X=X, W=W)
        np.testing.assert_array_equal(dml.effect(X[:1]), newdml.effect(X[:1]))
        np.testing.assert_array_equal(dml.effect_interval(X[:1])[0], newdml.effect_interval(X[:1])[0])
コード例 #2
0
ファイル: test_ortho_iv.py プロジェクト: lizhengecon/EconML
    def test_access_to_internal_models(self):
        """
        Test that API related to accessing the nuisance models, cate_model and featurizer is working.
        """
        from econml.dml import DMLCateEstimator

        Y = np.array([2, 3, 1, 3, 2, 1, 1, 1])
        T = np.array([3, 2, 1, 2, 1, 2, 1, 3])
        X = np.ones((8, 1))
        est = DMLCateEstimator(model_y=WeightedLasso(),
                               model_t=LogisticRegression(),
                               model_final=WeightedLasso(),
                               featurizer=PolynomialFeatures(
                                   degree=2, include_bias=False),
                               fit_cate_intercept=True,
                               discrete_treatment=True)
        est.fit(Y, T, X)
        assert isinstance(est.original_featurizer, PolynomialFeatures)
        assert isinstance(est.featurizer, Pipeline)
        assert isinstance(est.model_cate, WeightedLasso)
        for mdl in est.models_y:
            assert isinstance(mdl, WeightedLasso)
        for mdl in est.models_t:
            assert isinstance(mdl, LogisticRegression)
        np.testing.assert_array_equal(est.cate_feature_names(['A']),
                                      ['A', 'A^2'])
        np.testing.assert_array_equal(est.cate_feature_names(), ['x0', 'x0^2'])
        est = DMLCateEstimator(model_y=WeightedLasso(),
                               model_t=LogisticRegression(),
                               model_final=WeightedLasso(),
                               featurizer=None,
                               fit_cate_intercept=True,
                               discrete_treatment=True)
        est.fit(Y, T, X)
        assert est.original_featurizer is None
        assert isinstance(est.featurizer, FunctionTransformer)
        assert isinstance(est.model_cate, WeightedLasso)
        for mdl in est.models_y:
            assert isinstance(mdl, WeightedLasso)
        for mdl in est.models_t:
            assert isinstance(mdl, LogisticRegression)
        np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A'])
コード例 #3
0
ファイル: test_ortho_iv.py プロジェクト: subhamkhemka/EconML
    def test_multidim_arrays_fail(self):

        Y = np.array([2, 3, 1, 3, 2, 1, 1, 1])
        three_class = np.array([1, 2, 3, 1, 2, 3, 1, 2])
        two_class = np.array([1, 2, 1, 1, 2, 1, 1, 2])

        est = NonParamDMLIV(model_Y_X=Lasso(), model_T_X=LogisticRegression(), model_T_XZ=LogisticRegression(),
                            model_final=WeightedLasso(), discrete_treatment=True)

        with pytest.raises(AttributeError):
            est.fit(Y, T=three_class, Z=two_class)

        est = IntentToTreatDRIV(model_Y_X=Lasso(), model_T_XZ=LogisticRegression(),
                                flexible_model_effect=WeightedLasso())

        with pytest.raises(AttributeError):
            est.fit(Y, T=three_class, Z=two_class)

        with pytest.raises(AttributeError):
            est.fit(Y, T=two_class, Z=three_class)
コード例 #4
0
 def test_bad_treatment_nonparam(self):
     """
     Test that the non-parametric dml raises errors when treatment is not binary or single dimensional
     """
     Y = np.array([2, 3, 1, 3, 2, 1, 1, 1])
     T = np.array([3, 2, 1, 2, 1, 2, 1, 3])
     X = np.ones((8, 1))
     est = NonParamDMLCateEstimator(model_y=WeightedLasso(),
                                    model_t=LogisticRegression(),
                                    model_final=WeightedLasso(),
                                    discrete_treatment=True)
     with pytest.raises(AttributeError):
         est.fit(Y, T, X)
     T = np.ones((8, 2))
     est = NonParamDMLCateEstimator(model_y=WeightedLasso(),
                                    model_t=LinearRegression(),
                                    model_final=WeightedLasso(),
                                    discrete_treatment=False)
     with pytest.raises(AttributeError):
         est.fit(Y, T, X)
コード例 #5
0
 def _compare_with_lasso(self,
                         lasso_X,
                         lasso_y,
                         wlasso_X,
                         wlasso_y,
                         sample_weight,
                         alpha_range=[0.01],
                         params={}):
     for alpha in alpha_range:
         lasso = Lasso(alpha=alpha)
         lasso.set_params(**params)
         lasso.fit(lasso_X, lasso_y)
         wlasso = WeightedLasso(alpha=alpha)
         wlasso.set_params(**params)
         wlasso.fit(wlasso_X, wlasso_y, sample_weight=sample_weight)
         # Check results are similar with tolerance 1e-6
         if np.ndim(lasso_y) > 1:
             for i in range(lasso_y.shape[1]):
                 np.testing.assert_allclose(lasso.coef_[i], wlasso.coef_[i])
                 if lasso.get_params()["fit_intercept"]:
                     self.assertAlmostEqual(lasso.intercept_[i],
                                            wlasso.intercept_[i])
         else:
             np.testing.assert_allclose(lasso.coef_, wlasso.coef_)
             self.assertAlmostEqual(lasso.intercept_, wlasso.intercept_)
コード例 #6
0
 def test_orf(self):
     # Single outcome only, ORF does not support multiple outcomes
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat]
     # Test DMLOrthoForest
     est = DMLOrthoForest(n_trees=100,
                          max_depth=2,
                          model_T=WeightedLasso(),
                          model_Y=WeightedLasso())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_popsum_names(est.effect_inference(X).population_summary())
     # Test DROrthoForest
     est = DROrthoForest(n_trees=100, max_depth=2)
     T = TestPandasIntegration.df[TestPandasIntegration.bin_treat]
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_popsum_names(est.effect_inference(X).population_summary())
コード例 #7
0
ファイル: test_ortho_iv.py プロジェクト: wenjunsun/EconML
    def test_access_to_internal_models(self):
        """
        Test that API related to accessing the nuisance models, cate_model and featurizer is working.
        """
        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(C=1000),
                                      flexible_model_effect=WeightedLasso(),
                                      featurizer=PolynomialFeatures(
                                          degree=2, include_bias=False))
        Y = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2])
        T = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2])
        Z = np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2])
        X = np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]).reshape(-1, 1)
        est.fit(Y, T, Z=Z, X=X)
        assert isinstance(est.original_featurizer, PolynomialFeatures)
        assert isinstance(est.featurizer_, Pipeline)
        assert isinstance(est.model_final_, StatsModelsLinearRegression)
        for mdl in est.models_Y_X:
            assert isinstance(mdl, LinearRegression)
        for mdl in est.models_T_XZ:
            assert isinstance(mdl, LogisticRegression)
        np.testing.assert_array_equal(est.cate_feature_names(['A']),
                                      ['A', 'A^2'])
        np.testing.assert_array_equal(est.cate_feature_names(), ['x0', 'x0^2'])

        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(C=1000),
                                      flexible_model_effect=WeightedLasso(),
                                      featurizer=None)
        est.fit(Y, T, Z=Z, X=X)
        assert est.original_featurizer is None
        assert isinstance(est.featurizer_, FunctionTransformer)
        assert isinstance(est.model_final_, StatsModelsLinearRegression)
        for mdl in est.models_Y_X:
            assert isinstance(mdl, LinearRegression)
        for mdl in est.models_T_XZ:
            assert isinstance(mdl, LogisticRegression)
        np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A'])
コード例 #8
0
def monte_carlo_lasso(first_stage=lambda: WeightedLasso(
    alpha=0.01, fit_intercept=True, tol=1e-6, random_state=123),
                      folder='lasso'):
    n_exp = 1000
    n_list = [500]
    hetero_coef_list = [1]
    d_list = [20]
    d_x_list = [5]
    p_list = [1, 2]
    t_list = [1, 3]
    cov_type_list = ['HC1']
    alpha_list = [.01, .05, .2]
    run_all_mc(first_stage, folder, n_list, n_exp, hetero_coef_list, d_list,
               d_x_list, p_list, t_list, cov_type_list, alpha_list)
コード例 #9
0
ファイル: test_ortho_iv.py プロジェクト: subhamkhemka/EconML
    def test_can_use_statsmodel_inference(self):
        """Test that we can use statsmodels to generate confidence intervals"""
        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(C=1000),
                                      flexible_model_effect=WeightedLasso())
        est.fit(np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]),
                np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]),
                Z=np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]),
                X=np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]).reshape(-1, 1))
        interval = est.effect_interval(np.ones((9, 1)),
                                       T0=np.array([1, 1, 1, 2, 2, 2, 1, 1, 1]),
                                       T1=np.array([1, 2, 1, 1, 2, 2, 2, 2, 1]),
                                       alpha=0.05)
        point = est.effect(np.ones((9, 1)),
                           T0=np.array([1, 1, 1, 2, 2, 2, 1, 1, 1]),
                           T1=np.array([1, 2, 1, 1, 2, 2, 2, 2, 1]))

        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(lo < hi)  # for at least some of the examples, the CI should have nonzero width

        interval = est.const_marginal_effect_interval(np.ones((9, 1)), alpha=0.05)
        point = est.const_marginal_effect(np.ones((9, 1)))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(lo < hi)  # for at least some of the examples, the CI should have nonzero width

        interval = est.coef__interval(alpha=0.05)
        point = est.coef_
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(lo < hi)  # for at least some of the examples, the CI should have nonzero width

        interval = est.intercept__interval(alpha=0.05)
        point = est.intercept_
        assert len(interval) == 2
        lo, hi = interval
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(lo < hi)  # for at least some of the examples, the CI should have nonzero width
コード例 #10
0
ファイル: test_ortho_iv.py プロジェクト: wenjunsun/EconML
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 30

        def size(n, d):
            return (n, d) if d >= 0 else (n, )

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = size(n, d)
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every row
                    _, counts = np.unique(arr, return_counts=True, axis=0)
                    if len(counts) == 3**(d if d > 0 else
                                          1) and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        def eff_shape(n, d_y):
            return (n, ) + ((d_y, ) if d_y > 0 else ())

        def marg_eff_shape(n, d_y, d_t_final):
            return ((n, ) + ((d_y, ) if d_y > 0 else
                             ()) + ((d_t_final, ) if d_t_final > 0 else ()))

        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
        def const_marg_eff_shape(n, d_x, d_y, d_t_final):
            return ((n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) +
                    ((d_t_final, ) if d_t_final > 0 else ()))

        for d_t in [2, 1, -1]:
            n_t = d_t if d_t > 0 else 1
            for discrete_t in [True, False] if n_t == 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_q in [2, None]:
                        for d_z in [2, 1]:
                            if d_z < n_t:
                                continue
                            for discrete_z in [True, False
                                               ] if d_z == 1 else [False]:
                                Z1, Q, Y, T1 = [
                                    make_random(is_discrete, d)
                                    for is_discrete, d in [(
                                        discrete_z,
                                        d_z), (False,
                                               d_q), (False,
                                                      d_y), (discrete_t, d_t)]
                                ]
                                if discrete_t and discrete_z:
                                    # need to make sure we get all *joint* combinations
                                    arr = make_random(True, 2)
                                    Z1 = arr[:, 0].reshape(size(n, d_z))
                                    T1 = arr[:, 0].reshape(size(n, d_t))

                                d_t_final1 = 2 if discrete_t else d_t

                                if discrete_t:
                                    # IntentToTreat only supports binary treatments/instruments
                                    T2 = T1.copy()
                                    T2[T1 == 'c'] = np.random.choice(
                                        ['a', 'b'],
                                        size=np.count_nonzero(T1 == 'c'))
                                    d_t_final2 = 1
                                if discrete_z:
                                    # IntentToTreat only supports binary treatments/instruments
                                    Z2 = Z1.copy()
                                    Z2[Z1 == 'c'] = np.random.choice(
                                        ['a', 'b'],
                                        size=np.count_nonzero(Z1 == 'c'))

                                effect_shape = eff_shape(n, d_y)

                                model_t = LogisticRegression(
                                ) if discrete_t else Lasso()
                                model_z = LogisticRegression(
                                ) if discrete_z else Lasso()

                                all_infs = [None, BootstrapInference(1)]

                                estimators = [
                                    (DMLATEIV(model_Y_W=Lasso(),
                                              model_T_W=model_t,
                                              model_Z_W=model_z,
                                              discrete_treatment=discrete_t,
                                              discrete_instrument=discrete_z),
                                     True, all_infs),
                                    (ProjectedDMLATEIV(
                                        model_Y_W=Lasso(),
                                        model_T_W=model_t,
                                        model_T_WZ=model_t,
                                        discrete_treatment=discrete_t,
                                        discrete_instrument=discrete_z), False,
                                     all_infs),
                                    (DMLIV(model_Y_X=Lasso(),
                                           model_T_X=model_t,
                                           model_T_XZ=model_t,
                                           model_final=Lasso(),
                                           discrete_treatment=discrete_t,
                                           discrete_instrument=discrete_z),
                                     False, all_infs)
                                ]

                                if d_q and discrete_t and discrete_z:
                                    # IntentToTreat requires X
                                    estimators.append((LinearIntentToTreatDRIV(
                                        model_Y_X=Lasso(),
                                        model_T_XZ=model_t,
                                        flexible_model_effect=WeightedLasso(),
                                        cv=2), False, all_infs + ['auto']))

                                for est, multi, infs in estimators:
                                    if not (
                                            multi
                                    ) and d_y > 1 or d_t > 1 or d_z > 1:
                                        continue

                                    # ensure we can serialize unfit estimator
                                    pickle.dumps(est)

                                    d_ws = [None]
                                    if isinstance(est,
                                                  LinearIntentToTreatDRIV):
                                        d_ws.append(2)

                                    for d_w in d_ws:
                                        W = make_random(False, d_w)

                                        for inf in infs:
                                            with self.subTest(
                                                    d_z=d_z,
                                                    d_x=d_q,
                                                    d_y=d_y,
                                                    d_t=d_t,
                                                    discrete_t=discrete_t,
                                                    discrete_z=discrete_z,
                                                    est=est,
                                                    inf=inf):
                                                Z = Z1
                                                T = T1
                                                d_t_final = d_t_final1
                                                X = Q
                                                d_x = d_q

                                                if isinstance(
                                                        est,
                                                    (DMLATEIV,
                                                     ProjectedDMLATEIV)):
                                                    # these support only W but not X
                                                    W = Q
                                                    X = None
                                                    d_x = None

                                                    def fit():
                                                        return est.fit(
                                                            Y,
                                                            T,
                                                            Z=Z,
                                                            W=W,
                                                            inference=inf)

                                                    def score():
                                                        return est.score(Y,
                                                                         T,
                                                                         Z=Z,
                                                                         W=W)
                                                else:
                                                    # these support only binary, not general discrete T and Z
                                                    if discrete_t:
                                                        T = T2
                                                        d_t_final = d_t_final2

                                                    if discrete_z:
                                                        Z = Z2

                                                    if isinstance(
                                                            est,
                                                            LinearIntentToTreatDRIV
                                                    ):

                                                        def fit():
                                                            return est.fit(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                W=W,
                                                                inference=inf)

                                                        def score():
                                                            return est.score(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                W=W)
                                                    else:

                                                        def fit():
                                                            return est.fit(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                inference=inf)

                                                        def score():
                                                            return est.score(
                                                                Y, T, Z=Z, X=X)

                                                marginal_effect_shape = marg_eff_shape(
                                                    n, d_y, d_t_final)
                                                const_marginal_effect_shape = const_marg_eff_shape(
                                                    n, d_x, d_y, d_t_final)

                                                fit()

                                                # ensure we can serialize fit estimator
                                                pickle.dumps(est)

                                                # make sure we can call the marginal_effect and effect methods
                                                const_marg_eff = est.const_marginal_effect(
                                                    X)
                                                marg_eff = est.marginal_effect(
                                                    T, X)
                                                self.assertEqual(
                                                    shape(marg_eff),
                                                    marginal_effect_shape)
                                                self.assertEqual(
                                                    shape(const_marg_eff),
                                                    const_marginal_effect_shape
                                                )

                                                np.testing.assert_array_equal(
                                                    marg_eff
                                                    if d_x else marg_eff[0:1],
                                                    const_marg_eff)

                                                T0 = np.full_like(
                                                    T, 'a'
                                                ) if discrete_t else np.zeros_like(
                                                    T)
                                                eff = est.effect(X,
                                                                 T0=T0,
                                                                 T1=T)
                                                self.assertEqual(
                                                    shape(eff), effect_shape)

                                                # TODO: add tests for extra properties like coef_ where they exist

                                                if inf is not None:
                                                    const_marg_eff_int = est.const_marginal_effect_interval(
                                                        X)
                                                    marg_eff_int = est.marginal_effect_interval(
                                                        T, X)
                                                    self.assertEqual(
                                                        shape(marg_eff_int),
                                                        (2, ) +
                                                        marginal_effect_shape)
                                                    self.assertEqual(
                                                        shape(
                                                            const_marg_eff_int
                                                        ), (2, ) +
                                                        const_marginal_effect_shape
                                                    )
                                                    self.assertEqual(
                                                        shape(
                                                            est.
                                                            effect_interval(
                                                                X, T0=T0,
                                                                T1=T)),
                                                        (2, ) + effect_shape)

                                                # TODO: add tests for extra properties like coef_ where they exist

                                                score()

                                                # make sure we can call effect with implied scalar treatments,
                                                # no matter the dimensions of T, and also that we warn when there
                                                # are multiple treatments
                                                if d_t > 1:
                                                    cm = self.assertWarns(
                                                        Warning)
                                                else:
                                                    # ExitStack can be used as a "do nothing" ContextManager
                                                    cm = ExitStack()
                                                with cm:
                                                    effect_shape2 = (
                                                        n if d_x else 1, ) + (
                                                            (d_y, )
                                                            if d_y > 0 else ())
                                                    eff = est.effect(
                                                        X
                                                    ) if not discrete_t else est.effect(
                                                        X, T0='a', T1='b')
                                                    self.assertEqual(
                                                        shape(eff),
                                                        effect_shape2)
コード例 #11
0
    def test_orthoiv(self):
        y, T, X, W = self._get_data()
        Z = T.copy()
        est = DMLATEIV(model_Y_W=LinearRegression(),
                       model_T_W=LinearRegression(),
                       model_Z_W=LinearRegression(),
                       mc_iters=2)
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        est.refit_final()
        est.model_Y_W = Lasso()
        est.model_T_W = ElasticNet()
        est.model_Z_W = WeightedLasso()
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0]._model_Y_W._model, Lasso)
        assert isinstance(est.models_nuisance_[0]._model_T_W._model, ElasticNet)
        assert isinstance(est.models_nuisance_[0]._model_Z_W._model, WeightedLasso)

        est = ProjectedDMLATEIV(model_Y_W=LinearRegression(),
                                model_T_W=LinearRegression(),
                                model_T_WZ=LinearRegression(),
                                mc_iters=2)
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        est.refit_final()
        est.model_Y_W = Lasso()
        est.model_T_W = ElasticNet()
        est.model_T_WZ = WeightedLasso()
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0]._model_Y_W._model, Lasso)
        assert isinstance(est.models_nuisance_[0]._model_T_W._model, ElasticNet)
        assert isinstance(est.models_nuisance_[0]._model_T_WZ._model, WeightedLasso)

        est = DMLIV(model_Y_X=LinearRegression(),
                    model_T_X=LinearRegression(),
                    model_T_XZ=LinearRegression(),
                    model_final=LinearRegression(fit_intercept=False),
                    mc_iters=2)
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        np.testing.assert_equal(len(est.coef_), X.shape[1])
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.refit_final()
        np.testing.assert_equal(len(est.coef_), X.shape[1]**2)
        est.intercept_
        est.fit_cate_intercept = False
        est.intercept_
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        est.model_Y_X = Lasso()
        est.model_T_X = ElasticNet()
        est.model_T_XZ = WeightedLasso()
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        assert isinstance(est.models_Y_X[0], Lasso)
        assert isinstance(est.models_T_X[0], ElasticNet)
        assert isinstance(est.models_T_XZ[0], WeightedLasso)

        est = DMLIV(model_Y_X=LinearRegression(),
                    model_T_X=LinearRegression(),
                    model_T_XZ=LinearRegression(),
                    model_final=LinearRegression(fit_intercept=False),
                    mc_iters=2)
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        np.testing.assert_equal(len(est.coef_), X.shape[1])
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.refit_final()
        np.testing.assert_equal(len(est.coef_), X.shape[1]**2)
        est.intercept_
        est.fit_cate_intercept = False
        est.intercept_
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        est.model_Y_X = Lasso()
        est.model_T_X = ElasticNet()
        est.model_T_XZ = WeightedLasso()
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0]._model_Y_X._model, Lasso)
        assert isinstance(est.models_nuisance_[0]._model_T_X._model, ElasticNet)
        assert isinstance(est.models_nuisance_[0]._model_T_XZ._model, WeightedLasso)

        est = NonParamDMLIV(model_Y_X=LinearRegression(),
                            model_T_X=LinearRegression(),
                            model_T_XZ=LinearRegression(),
                            model_final=LinearRegression(fit_intercept=True),
                            mc_iters=2)
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.model_final = WeightedLasso()
        est.refit_final()
        assert isinstance(est.model_cate, WeightedLasso)
        assert isinstance(est.featurizer_, PolynomialFeatures)

        est = IntentToTreatDRIV(model_Y_X=LinearRegression(), model_T_XZ=LogisticRegression(),
                                flexible_model_effect=LinearRegression())
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        assert est.model_final is None
        assert isinstance(est.model_final_, LinearRegression)
        est.flexible_model_effect = Lasso()
        est.refit_final()
        assert est.model_final is None
        assert isinstance(est.model_final_, Lasso)
        est.model_final = Lasso()
        est.refit_final()
        assert isinstance(est.model_final, Lasso)
        assert isinstance(est.model_final_, Lasso)
        assert isinstance(est.models_nuisance_[0]._prel_model_effect.model_final_, LinearRegression)
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0]._prel_model_effect.model_final_, Lasso)

        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(), model_T_XZ=LogisticRegression(),
                                      flexible_model_effect=LinearRegression())
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        est.fit_cate_intercept = False
        est.intercept_
        est.intercept__interval()
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        with pytest.raises(AttributeError):
            est.intercept__interval()
        with pytest.raises(ValueError):
            est.model_final = LinearRegression()
        est.flexible_model_effect = Lasso()
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0]._prel_model_effect.model_final_, Lasso)
コード例 #12
0
    model_Y=sklearn.linear_model.LinearRegression())
est.fit(Y, T, W, W)
print(est.effect(W[:2]))

# advanced example with many confounders

X = np.random.uniform(-1, 1, size=(4000, 1))
W = np.random.normal(size=(4000, 50))
support = np.random.choice(50, 4, replace=False)
T = np.dot(W[:, support],
           np.random.normal(size=4)) + np.random.normal(size=4000)
Y = np.exp(2 * X[:, 0]) * T + np.dot(W[:, support],
                                     np.random.normal(size=4)) + .5
est = ContinuousTreatmentOrthoForest(n_trees=100,
                                     max_depth=5,
                                     model_Y=WeightedLasso(alpha=0.01),
                                     model_T=WeightedLasso(alpha=0.01))
est.fit(Y, T, X, W)

X_test = np.linspace(-1, 1, 30).reshape(-1, 1)
treatment_effects = est.effect(X_test)
plt.plot(X_test[:, 0], treatment_effects, label='ORF estimate')
plt.plot(X_test[:, 0], np.exp(2 * X_test[:, 0]), 'b--', label='True effect')
plt.legend()
plt.show(block=False)

########## get my data in the framework #######################################

# generate controls/confounders  ## W ##
region_fe = pd.get_dummies(dta['ags'])
time_fe = pd.get_dummies(data=dta[['dow_num', 'month', 'year']],
コード例 #13
0
ファイル: test_statsmodels.py プロジェクト: tommylegit/EconML
 def first_stage_model():
     return WeightedLasso(alpha=0.01, fit_intercept=True, tol=1e-12, random_state=123)
コード例 #14
0
ファイル: test_refit.py プロジェクト: microsoft/EconML
    def test_orthoiv(self):
        y, T, X, W = self._get_data()
        Z = T.copy()
        est = OrthoIV(model_y_xw=LinearRegression(),
                      model_t_xw=LinearRegression(),
                      model_z_xw=LinearRegression(),
                      mc_iters=2)
        est.fit(y, T, Z=Z, W=W, cache_values=True)
        est.refit_final()
        est.model_y_xw = Lasso()
        est.model_t_xw = ElasticNet()
        est.model_z_xw = WeightedLasso()
        est.fit(y, T, Z=Z, W=W, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_y_xw._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_t_xw._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_z_xw._model,
                          WeightedLasso)

        est = DMLIV(model_y_xw=LinearRegression(),
                    model_t_xw=LinearRegression(),
                    model_t_xwz=LinearRegression(),
                    model_final=LinearRegression(fit_intercept=False),
                    mc_iters=2)
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        est.model_y_xw = Lasso()
        est.model_t_xw = ElasticNet()
        est.model_t_xwz = WeightedLasso()
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_y_xw._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_t_xw._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_t_xwz._model,
                          WeightedLasso)

        est = NonParamDMLIV(model_y_xw=LinearRegression(),
                            model_t_xw=LinearRegression(),
                            model_t_xwz=LinearRegression(),
                            model_final=LinearRegression(fit_intercept=True),
                            mc_iters=2)
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.model_final = WeightedLasso()
        est.refit_final()
        assert isinstance(est.model_cate, WeightedLasso)
        assert isinstance(est.featurizer_, PolynomialFeatures)

        est = IntentToTreatDRIV(model_y_xw=LinearRegression(),
                                model_t_xwz=LogisticRegression(),
                                flexible_model_effect=LinearRegression())
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert est.model_final is None
        assert isinstance(est.model_final_, LinearRegression)
        est.flexible_model_effect = Lasso()
        est.refit_final()
        assert est.model_final is None
        assert isinstance(est.model_final_, Lasso)
        est.model_final = Lasso()
        est.refit_final()
        assert isinstance(est.model_final, Lasso)
        assert isinstance(est.model_final_, Lasso)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_,
            LinearRegression)
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_, Lasso)

        est = LinearIntentToTreatDRIV(model_y_xw=LinearRegression(),
                                      model_t_xwz=LogisticRegression(),
                                      flexible_model_effect=LinearRegression())
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        est.fit_cate_intercept = False
        est.intercept_
        est.intercept__interval()
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        with pytest.raises(AttributeError):
            est.intercept__interval()
        with pytest.raises(ValueError):
            est.model_final = LinearRegression()
        est.flexible_model_effect = Lasso()
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_, Lasso)
コード例 #15
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d >= 0 else (n, )
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 3 and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_t in [2, 1, -1]:
            for is_discrete in [True, False] if d_t <= 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_x in [2, None]:
                        for d_w in [2, None]:
                            W, X, Y, T = [
                                make_random(is_discrete, d)
                                for is_discrete, d in [(
                                    False,
                                    d_w), (False,
                                           d_x), (False,
                                                  d_y), (is_discrete, d_t)]
                            ]

                            for featurizer, fit_cate_intercept in\
                                [(None, True),
                                 (PolynomialFeatures(degree=2, include_bias=False), True),
                                 (PolynomialFeatures(degree=2, include_bias=True), False)]:

                                d_t_final = 2 if is_discrete else d_t

                                effect_shape = (n, ) + ((d_y, ) if d_y > 0 else
                                                        ())
                                marginal_effect_shape = ((n, ) + (
                                    (d_y, ) if d_y > 0 else
                                    ()) + ((d_t_final, ) if d_t_final > 0 else
                                           ()))

                                # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                                const_marginal_effect_shape = (
                                    (n if d_x else 1, ) +
                                    ((d_y, ) if d_y > 0 else
                                     ()) + ((d_t_final, ) if d_t_final > 0 else
                                            ()))

                                fd_x = featurizer.fit_transform(X).shape[1:] if featurizer and d_x\
                                    else ((d_x,) if d_x else (0,))
                                coef_shape = Y.shape[1:] + (
                                    T.shape[1:] if not is_discrete else
                                    (2, )) + fd_x
                                intercept_shape = Y.shape[1:] + (
                                    T.shape[1:] if not is_discrete else (2, ))

                                model_t = LogisticRegression(
                                ) if is_discrete else Lasso()

                                # TODO: add stratification to bootstrap so that we can use it
                                # even with discrete treatments
                                all_infs = [None, 'statsmodels']
                                if not is_discrete:
                                    all_infs.append(BootstrapInference(1))

                                for est, multi, infs in\
                                    [(LinearDMLCateEstimator(model_y=Lasso(),
                                                             model_t='auto',
                                                             featurizer=featurizer,
                                                             fit_cate_intercept=fit_cate_intercept,
                                                             discrete_treatment=is_discrete),
                                      True,
                                      all_infs),
                                     (SparseLinearDMLCateEstimator(model_y=WeightedLasso(),
                                                                   model_t=model_t,
                                                                   featurizer=featurizer,
                                                                   fit_cate_intercept=fit_cate_intercept,
                                                                   discrete_treatment=is_discrete),
                                      True,
                                      [None, 'debiasedlasso']),
                                     (KernelDMLCateEstimator(model_y=WeightedLasso(),
                                                             model_t=model_t,
                                                             fit_cate_intercept=fit_cate_intercept,
                                                             discrete_treatment=is_discrete),
                                      False,
                                      [None])]:

                                    if not (multi) and d_y > 1:
                                        continue

                                    for inf in infs:
                                        with self.subTest(
                                                d_w=d_w,
                                                d_x=d_x,
                                                d_y=d_y,
                                                d_t=d_t,
                                                is_discrete=is_discrete,
                                                est=est,
                                                inf=inf):

                                            if X is None and (
                                                    not fit_cate_intercept):
                                                with pytest.raises(
                                                        AttributeError):
                                                    est.fit(Y,
                                                            T,
                                                            X,
                                                            W,
                                                            inference=inf)
                                                continue

                                            est.fit(Y, T, X, W, inference=inf)
                                            # make sure we can call the marginal_effect and effect methods
                                            const_marg_eff = est.const_marginal_effect(
                                                X)
                                            marg_eff = est.marginal_effect(
                                                T, X)
                                            self.assertEqual(
                                                shape(marg_eff),
                                                marginal_effect_shape)
                                            self.assertEqual(
                                                shape(const_marg_eff),
                                                const_marginal_effect_shape)

                                            np.testing.assert_array_equal(
                                                marg_eff if d_x else
                                                marg_eff[0:1], const_marg_eff)

                                            T0 = np.full_like(
                                                T, 'a'
                                            ) if is_discrete else np.zeros_like(
                                                T)
                                            eff = est.effect(X, T0=T0, T1=T)
                                            self.assertEqual(
                                                shape(eff), effect_shape)

                                            if isinstance(est, LinearDMLCateEstimator) or\
                                                    isinstance(est, SparseLinearDMLCateEstimator):
                                                self.assertEqual(
                                                    shape(est.coef_),
                                                    coef_shape)
                                                if fit_cate_intercept:
                                                    self.assertEqual(
                                                        shape(est.intercept_),
                                                        intercept_shape)
                                                else:
                                                    with pytest.raises(
                                                            AttributeError):
                                                        self.assertEqual(
                                                            shape(est.
                                                                  intercept_),
                                                            intercept_shape)

                                            if inf is not None:
                                                const_marg_eff_int = est.const_marginal_effect_interval(
                                                    X)
                                                marg_eff_int = est.marginal_effect_interval(
                                                    T, X)
                                                self.assertEqual(
                                                    shape(marg_eff_int),
                                                    (2, ) +
                                                    marginal_effect_shape)
                                                self.assertEqual(
                                                    shape(const_marg_eff_int),
                                                    (2, ) +
                                                    const_marginal_effect_shape
                                                )
                                                self.assertEqual(
                                                    shape(
                                                        est.effect_interval(
                                                            X, T0=T0, T1=T)),
                                                    (2, ) + effect_shape)
                                                if (isinstance(
                                                        est,
                                                        LinearDMLCateEstimator
                                                ) or isinstance(
                                                        est,
                                                        SparseLinearDMLCateEstimator
                                                )):
                                                    self.assertEqual(
                                                        shape(
                                                            est.coef__interval(
                                                            )),
                                                        (2, ) + coef_shape)
                                                    if fit_cate_intercept:
                                                        self.assertEqual(
                                                            shape(
                                                                est.
                                                                intercept__interval(
                                                                )), (2, ) +
                                                            intercept_shape)
                                                    else:
                                                        with pytest.raises(
                                                                AttributeError
                                                        ):
                                                            self.assertEqual(
                                                                shape(
                                                                    est.
                                                                    intercept__interval(
                                                                    )), (2, ) +
                                                                intercept_shape
                                                            )

                                            est.score(Y, T, X, W)

                                            # make sure we can call effect with implied scalar treatments,
                                            # no matter the dimensions of T, and also that we warn when there
                                            # are multiple treatments
                                            if d_t > 1:
                                                cm = self.assertWarns(Warning)
                                            else:
                                                # ExitStack can be used as a "do nothing" ContextManager
                                                cm = ExitStack()
                                            with cm:
                                                effect_shape2 = (
                                                    n if d_x else 1, ) + (
                                                        (d_y, ) if d_y > 0 else
                                                        ())
                                                eff = est.effect(
                                                    X
                                                ) if not is_discrete else est.effect(
                                                    X, T0='a', T1='b')
                                                self.assertEqual(
                                                    shape(eff), effect_shape2)
コード例 #16
0
    def test_cate_api_nonparam(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d >= 0 else (n, )
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 2 and counts.min() > 2:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_t in [1, -1]:
            for is_discrete in [True, False] if d_t <= 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_x in [2, None]:
                        for d_w in [2, None]:
                            W, X, Y, T = [
                                make_random(is_discrete, d)
                                for is_discrete, d in [(
                                    False,
                                    d_w), (False,
                                           d_x), (False,
                                                  d_y), (is_discrete, d_t)]
                            ]

                            d_t_final = 1 if is_discrete else d_t

                            effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ())
                            marginal_effect_shape = ((n, ) + (
                                (d_y, ) if d_y > 0 else
                                ()) + ((d_t_final, ) if d_t_final > 0 else ()))

                            # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                            const_marginal_effect_shape = (
                                (n if d_x else 1, ) + ((d_y, ) if d_y > 0 else
                                                       ()) +
                                ((d_t_final, ) if d_t_final > 0 else ()))

                            model_t = LogisticRegression(
                            ) if is_discrete else WeightedLasso()

                            # TODO Add bootstrap inference, once discrete treatment issue is fixed
                            base_infs = [None]
                            if not is_discrete:
                                base_infs += [BootstrapInference(2)]
                            for est, multi, infs in [
                                (NonParamDMLCateEstimator(
                                    model_y=WeightedLasso(),
                                    model_t=model_t,
                                    model_final=WeightedLasso(),
                                    featurizer=None,
                                    discrete_treatment=is_discrete), True,
                                 base_infs),
                                (NonParamDMLCateEstimator(
                                    model_y=WeightedLasso(),
                                    model_t=model_t,
                                    model_final=WeightedLasso(),
                                    featurizer=FunctionTransformer(),
                                    discrete_treatment=is_discrete), True,
                                 base_infs),
                                (ForestDMLCateEstimator(
                                    model_y=WeightedLasso(),
                                    model_t=model_t,
                                    discrete_treatment=is_discrete), True,
                                 base_infs + ['blb'])
                            ]:

                                if not (multi) and d_y > 1:
                                    continue

                                for inf in infs:
                                    with self.subTest(d_w=d_w,
                                                      d_x=d_x,
                                                      d_y=d_y,
                                                      d_t=d_t,
                                                      is_discrete=is_discrete,
                                                      est=est,
                                                      inf=inf):
                                        if X is None:
                                            with pytest.raises(AttributeError):
                                                est.fit(Y,
                                                        T,
                                                        X,
                                                        W,
                                                        inference=inf)
                                            continue

                                        est.fit(Y, T, X, W, inference=inf)
                                        # make sure we can call the marginal_effect and effect methods
                                        const_marg_eff = est.const_marginal_effect(
                                            X)
                                        marg_eff = est.marginal_effect(T, X)
                                        self.assertEqual(
                                            shape(marg_eff),
                                            marginal_effect_shape)
                                        self.assertEqual(
                                            shape(const_marg_eff),
                                            const_marginal_effect_shape)

                                        np.testing.assert_array_equal(
                                            marg_eff if d_x else marg_eff[0:1],
                                            const_marg_eff)

                                        T0 = np.full_like(
                                            T, 'a'
                                        ) if is_discrete else np.zeros_like(T)
                                        eff = est.effect(X, T0=T0, T1=T)
                                        self.assertEqual(
                                            shape(eff), effect_shape)

                                        if inf is not None:
                                            const_marg_eff_int = est.const_marginal_effect_interval(
                                                X)
                                            marg_eff_int = est.marginal_effect_interval(
                                                T, X)
                                            self.assertEqual(
                                                shape(marg_eff_int),
                                                (2, ) + marginal_effect_shape)
                                            self.assertEqual(
                                                shape(const_marg_eff_int),
                                                (2, ) +
                                                const_marginal_effect_shape)
                                            self.assertEqual(
                                                shape(
                                                    est.effect_interval(X,
                                                                        T0=T0,
                                                                        T1=T)),
                                                (2, ) + effect_shape)

                                        est.score(Y, T, X, W)

                                        # make sure we can call effect with implied scalar treatments, no matter the
                                        # dimensions of T, and also that we warn when there are multiple treatments
                                        if d_t > 1:
                                            cm = self.assertWarns(Warning)
                                        else:
                                            cm = ExitStack(
                                            )  # ExitStack can be used as a "do nothing" ContextManager
                                        with cm:
                                            effect_shape2 = (
                                                n if d_x else 1, ) + (
                                                    (d_y, ) if d_y > 0 else ())
                                            eff = est.effect(
                                                X
                                            ) if not is_discrete else est.effect(
                                                X, T0='a', T1='b')
                                            self.assertEqual(
                                                shape(eff), effect_shape2)