예제 #1
0
 def test_parameter_passing(self):
     for gen in [DML, NonParamDML]:
         est = gen(model_y=LinearRegression(),
                   model_t=LinearRegression(),
                   model_final=LinearRegression(),
                   mc_iters=2,
                   mc_agg='median')
         assert est.mc_iters == 2
         assert est.mc_agg == 'median'
     for gen in [LinearDML, SparseLinearDML, KernelDML, ForestDML]:
         est = gen(model_y=LinearRegression(),
                   model_t=LinearRegression(),
                   mc_iters=2,
                   mc_agg='median')
         assert est.mc_iters == 2
         assert est.mc_agg == 'median'
     for gen in [
             DRLearner, LinearDRLearner, SparseLinearDRLearner,
             ForestDRLearner
     ]:
         est = gen(mc_iters=2, mc_agg='median')
         assert est.mc_iters == 2
         assert est.mc_agg == 'median'
     for gen in [
             DMLATEIV(model_Y_W=LinearRegression(),
                      model_T_W=LinearRegression(),
                      model_Z_W=LinearRegression(),
                      mc_iters=2,
                      mc_agg='median'),
             ProjectedDMLATEIV(model_Y_W=LinearRegression(),
                               model_T_W=LinearRegression(),
                               model_T_WZ=LinearRegression(),
                               mc_iters=2,
                               mc_agg='median'),
             DMLIV(model_Y_X=LinearRegression(),
                   model_T_X=LinearRegression(),
                   model_T_XZ=LinearRegression(),
                   model_final=LinearRegression(),
                   mc_iters=2,
                   mc_agg='median'),
             NonParamDMLIV(model_Y_X=LinearRegression(),
                           model_T_X=LinearRegression(),
                           model_T_XZ=LinearRegression(),
                           model_final=LinearRegression(),
                           mc_iters=2,
                           mc_agg='median'),
             IntentToTreatDRIV(model_Y_X=LinearRegression(),
                               model_T_XZ=LinearRegression(),
                               flexible_model_effect=LinearRegression(),
                               mc_iters=2,
                               mc_agg='median'),
             LinearIntentToTreatDRIV(
                 model_Y_X=LinearRegression(),
                 model_T_XZ=LinearRegression(),
                 flexible_model_effect=LinearRegression(),
                 mc_iters=2,
                 mc_agg='median')
     ]:
         assert est.mc_iters == 2
         assert est.mc_agg == 'median'
예제 #2
0
    def test_accuracy(self):
        np.random.seed(123)
        # dgp (binary T, binary Z)

        def dgp(n, p, true_fn):
            X = np.random.normal(0, 1, size=(n, p))
            Z = np.random.binomial(1, 0.5, size=(n,))
            nu = np.random.uniform(0, 10, size=(n,))
            coef_Z = 0.8
            C = np.random.binomial(
                1, coef_Z * special.expit(0.4 * X[:, 0] + nu)
            )  # Compliers when recomended
            C0 = np.random.binomial(
                1, 0.06 * np.ones(X.shape[0])
            )  # Non-compliers when not recommended
            T = C * Z + C0 * (1 - Z)
            y = true_fn(X) * T + 2 * nu + 5 * (X[:, 3] > 0) + 0.1 * np.random.uniform(0, 1, size=(n,))
            return y, T, Z, X

        ests_list = [LinearIntentToTreatDRIV(
            flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), fit_cate_intercept=True
        ), LinearDRIV(
            fit_cate_intercept=True,
            projection=False,
            discrete_instrument=True,
            discrete_treatment=True,
            flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False)
        )]

        # no heterogeneity
        n = 1000
        p = 10
        true_ate = 10

        def true_fn(X):
            return true_ate
        y, T, Z, X = dgp(n, p, true_fn)
        for est in ests_list:
            with self.subTest(est=est):
                est.fit(y, T, Z=Z, X=None, W=X, inference="auto")
                ate_lb, ate_ub = est.ate_interval()
                np.testing.assert_array_less(ate_lb, true_ate)
                np.testing.assert_array_less(true_ate, ate_ub)

        # with heterogeneity
        true_coef = 10

        def true_fn(X):
            return true_coef * X[:, 0]
        y, T, Z, X = dgp(n, p, true_fn)
        for est in ests_list:
            with self.subTest(est=est):
                est.fit(y, T, Z=Z, X=X[:, [0]], W=X[:, 1:], inference="auto")
                coef_lb, coef_ub = est.coef__interval()
                intercept_lb, intercept_ub = est.intercept__interval(alpha=0.05)
                np.testing.assert_array_less(coef_lb, true_coef)
                np.testing.assert_array_less(true_coef, coef_ub)
                np.testing.assert_array_less(intercept_lb, 0)
                np.testing.assert_array_less(0, intercept_ub)
예제 #3
0
 def test_orthoiv(self):
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.bin_treat]
     Z = TestPandasIntegration.df[TestPandasIntegration.instrument]
     # Test LinearIntentToTreatDRIV
     est = LinearIntentToTreatDRIV(
         model_y_xw=GradientBoostingRegressor(),
         model_t_xwz=GradientBoostingClassifier(),
         flexible_model_effect=GradientBoostingRegressor())
     est.fit(Y, T, Z=Z, X=X, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary())  # Check input names propagate
     self._check_popsum_names(est.effect_inference(X).population_summary())
예제 #4
0
 def test_stratify_orthoiv(self):
     """Test that we can properly stratify by treatment/instrument pair"""
     T = [1, 0, 1, 1, 0, 0, 1, 0]
     Z = [1, 0, 0, 1, 0, 1, 0, 1]
     Y = [1, 2, 3, 4, 5, 6, 7, 8]
     X = np.array([1, 1, 2, 2, 1, 2, 1, 2]).reshape(-1, 1)
     est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                   model_T_XZ=LogisticRegression(),
                                   flexible_model_effect=LinearRegression(),
                                   cv=2)
     inference = BootstrapInference(n_bootstrap_samples=20,
                                    n_jobs=-1,
                                    verbose=3)
     est.fit(Y, T, Z=Z, X=X, inference=inference)
     est.const_marginal_effect_interval(X)
예제 #5
0
    def test_can_use_statsmodel_inference(self):
        """Test that we can use statsmodels to generate confidence intervals"""
        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(C=1000),
                                      flexible_model_effect=WeightedLasso())
        est.fit(np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]),
                np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]),
                Z=np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]),
                X=np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5,
                            6]).reshape(-1, 1))
        interval = est.effect_interval(np.ones((9, 1)),
                                       T0=np.array([1, 1, 1, 2, 2, 2, 1, 1,
                                                    1]),
                                       T1=np.array([1, 2, 1, 1, 2, 2, 2, 2,
                                                    1]),
                                       alpha=0.05)
        point = est.effect(np.ones((9, 1)),
                           T0=np.array([1, 1, 1, 2, 2, 2, 1, 1, 1]),
                           T1=np.array([1, 2, 1, 1, 2, 2, 2, 2, 1]))

        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(
            lo < hi
        )  # for at least some of the examples, the CI should have nonzero width

        interval = est.const_marginal_effect_interval(np.ones((9, 1)),
                                                      alpha=0.05)
        point = est.const_marginal_effect(np.ones((9, 1)))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(
            lo < hi
        )  # for at least some of the examples, the CI should have nonzero width

        interval = est.coef__interval(alpha=0.05)
        point = est.coef_
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(
            lo < hi
        )  # for at least some of the examples, the CI should have nonzero width

        interval = est.intercept__interval(alpha=0.05)
        point = est.intercept_
        assert len(interval) == 2
        lo, hi = interval
        assert np.all(lo <= point)
        assert np.all(point <= hi)
        assert np.any(
            lo < hi
        )  # for at least some of the examples, the CI should have nonzero width
예제 #6
0
    def test_access_to_internal_models(self):
        """
        Test that API related to accessing the nuisance models, cate_model and featurizer is working.
        """
        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(C=1000),
                                      flexible_model_effect=WeightedLasso(),
                                      featurizer=PolynomialFeatures(
                                          degree=2, include_bias=False))
        Y = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2])
        T = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2])
        Z = np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2])
        X = np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]).reshape(-1, 1)
        est.fit(Y, T, Z=Z, X=X)
        assert isinstance(est.original_featurizer, PolynomialFeatures)
        assert isinstance(est.featurizer_, Pipeline)
        assert isinstance(est.model_final_, StatsModelsLinearRegression)
        for mdl in est.models_Y_X:
            assert isinstance(mdl, LinearRegression)
        for mdl in est.models_T_XZ:
            assert isinstance(mdl, LogisticRegression)
        np.testing.assert_array_equal(est.cate_feature_names(['A']),
                                      ['A', 'A^2'])
        np.testing.assert_array_equal(est.cate_feature_names(), ['x0', 'x0^2'])

        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(C=1000),
                                      flexible_model_effect=WeightedLasso(),
                                      featurizer=None)
        est.fit(Y, T, Z=Z, X=X)
        assert est.original_featurizer is None
        assert isinstance(est.featurizer_, FunctionTransformer)
        assert isinstance(est.model_final_, StatsModelsLinearRegression)
        for mdl in est.models_Y_X:
            assert isinstance(mdl, LinearRegression)
        for mdl in est.models_T_XZ:
            assert isinstance(mdl, LogisticRegression)
        np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A'])
예제 #7
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 30

        def size(n, d):
            return (n, d) if d >= 0 else (n, )

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = size(n, d)
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every row
                    _, counts = np.unique(arr, return_counts=True, axis=0)
                    if len(counts) == 3**(d if d > 0 else
                                          1) and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        def eff_shape(n, d_y):
            return (n, ) + ((d_y, ) if d_y > 0 else ())

        def marg_eff_shape(n, d_y, d_t_final):
            return ((n, ) + ((d_y, ) if d_y > 0 else
                             ()) + ((d_t_final, ) if d_t_final > 0 else ()))

        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
        def const_marg_eff_shape(n, d_x, d_y, d_t_final):
            return ((n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) +
                    ((d_t_final, ) if d_t_final > 0 else ()))

        for d_t in [2, 1, -1]:
            n_t = d_t if d_t > 0 else 1
            for discrete_t in [True, False] if n_t == 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_q in [2, None]:
                        for d_z in [2, 1]:
                            if d_z < n_t:
                                continue
                            for discrete_z in [True, False
                                               ] if d_z == 1 else [False]:
                                Z1, Q, Y, T1 = [
                                    make_random(is_discrete, d)
                                    for is_discrete, d in [(
                                        discrete_z,
                                        d_z), (False,
                                               d_q), (False,
                                                      d_y), (discrete_t, d_t)]
                                ]
                                if discrete_t and discrete_z:
                                    # need to make sure we get all *joint* combinations
                                    arr = make_random(True, 2)
                                    Z1 = arr[:, 0].reshape(size(n, d_z))
                                    T1 = arr[:, 0].reshape(size(n, d_t))

                                d_t_final1 = 2 if discrete_t else d_t

                                if discrete_t:
                                    # IntentToTreat only supports binary treatments/instruments
                                    T2 = T1.copy()
                                    T2[T1 == 'c'] = np.random.choice(
                                        ['a', 'b'],
                                        size=np.count_nonzero(T1 == 'c'))
                                    d_t_final2 = 1
                                if discrete_z:
                                    # IntentToTreat only supports binary treatments/instruments
                                    Z2 = Z1.copy()
                                    Z2[Z1 == 'c'] = np.random.choice(
                                        ['a', 'b'],
                                        size=np.count_nonzero(Z1 == 'c'))

                                effect_shape = eff_shape(n, d_y)

                                model_t = LogisticRegression(
                                ) if discrete_t else Lasso()
                                model_z = LogisticRegression(
                                ) if discrete_z else Lasso()

                                all_infs = [None, BootstrapInference(1)]

                                estimators = [
                                    (DMLATEIV(model_Y_W=Lasso(),
                                              model_T_W=model_t,
                                              model_Z_W=model_z,
                                              discrete_treatment=discrete_t,
                                              discrete_instrument=discrete_z),
                                     True, all_infs),
                                    (ProjectedDMLATEIV(
                                        model_Y_W=Lasso(),
                                        model_T_W=model_t,
                                        model_T_WZ=model_t,
                                        discrete_treatment=discrete_t,
                                        discrete_instrument=discrete_z), False,
                                     all_infs),
                                    (DMLIV(model_Y_X=Lasso(),
                                           model_T_X=model_t,
                                           model_T_XZ=model_t,
                                           model_final=Lasso(),
                                           discrete_treatment=discrete_t,
                                           discrete_instrument=discrete_z),
                                     False, all_infs)
                                ]

                                if d_q and discrete_t and discrete_z:
                                    # IntentToTreat requires X
                                    estimators.append((LinearIntentToTreatDRIV(
                                        model_Y_X=Lasso(),
                                        model_T_XZ=model_t,
                                        flexible_model_effect=WeightedLasso(),
                                        cv=2), False, all_infs + ['auto']))

                                for est, multi, infs in estimators:
                                    if not (
                                            multi
                                    ) and d_y > 1 or d_t > 1 or d_z > 1:
                                        continue

                                    # ensure we can serialize unfit estimator
                                    pickle.dumps(est)

                                    d_ws = [None]
                                    if isinstance(est,
                                                  LinearIntentToTreatDRIV):
                                        d_ws.append(2)

                                    for d_w in d_ws:
                                        W = make_random(False, d_w)

                                        for inf in infs:
                                            with self.subTest(
                                                    d_z=d_z,
                                                    d_x=d_q,
                                                    d_y=d_y,
                                                    d_t=d_t,
                                                    discrete_t=discrete_t,
                                                    discrete_z=discrete_z,
                                                    est=est,
                                                    inf=inf):
                                                Z = Z1
                                                T = T1
                                                d_t_final = d_t_final1
                                                X = Q
                                                d_x = d_q

                                                if isinstance(
                                                        est,
                                                    (DMLATEIV,
                                                     ProjectedDMLATEIV)):
                                                    # these support only W but not X
                                                    W = Q
                                                    X = None
                                                    d_x = None

                                                    def fit():
                                                        return est.fit(
                                                            Y,
                                                            T,
                                                            Z=Z,
                                                            W=W,
                                                            inference=inf)

                                                    def score():
                                                        return est.score(Y,
                                                                         T,
                                                                         Z=Z,
                                                                         W=W)
                                                else:
                                                    # these support only binary, not general discrete T and Z
                                                    if discrete_t:
                                                        T = T2
                                                        d_t_final = d_t_final2

                                                    if discrete_z:
                                                        Z = Z2

                                                    if isinstance(
                                                            est,
                                                            LinearIntentToTreatDRIV
                                                    ):

                                                        def fit():
                                                            return est.fit(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                W=W,
                                                                inference=inf)

                                                        def score():
                                                            return est.score(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                W=W)
                                                    else:

                                                        def fit():
                                                            return est.fit(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                inference=inf)

                                                        def score():
                                                            return est.score(
                                                                Y, T, Z=Z, X=X)

                                                marginal_effect_shape = marg_eff_shape(
                                                    n, d_y, d_t_final)
                                                const_marginal_effect_shape = const_marg_eff_shape(
                                                    n, d_x, d_y, d_t_final)

                                                fit()

                                                # ensure we can serialize fit estimator
                                                pickle.dumps(est)

                                                # make sure we can call the marginal_effect and effect methods
                                                const_marg_eff = est.const_marginal_effect(
                                                    X)
                                                marg_eff = est.marginal_effect(
                                                    T, X)
                                                self.assertEqual(
                                                    shape(marg_eff),
                                                    marginal_effect_shape)
                                                self.assertEqual(
                                                    shape(const_marg_eff),
                                                    const_marginal_effect_shape
                                                )

                                                np.testing.assert_array_equal(
                                                    marg_eff
                                                    if d_x else marg_eff[0:1],
                                                    const_marg_eff)

                                                T0 = np.full_like(
                                                    T, 'a'
                                                ) if discrete_t else np.zeros_like(
                                                    T)
                                                eff = est.effect(X,
                                                                 T0=T0,
                                                                 T1=T)
                                                self.assertEqual(
                                                    shape(eff), effect_shape)

                                                # TODO: add tests for extra properties like coef_ where they exist

                                                if inf is not None:
                                                    const_marg_eff_int = est.const_marginal_effect_interval(
                                                        X)
                                                    marg_eff_int = est.marginal_effect_interval(
                                                        T, X)
                                                    self.assertEqual(
                                                        shape(marg_eff_int),
                                                        (2, ) +
                                                        marginal_effect_shape)
                                                    self.assertEqual(
                                                        shape(
                                                            const_marg_eff_int
                                                        ), (2, ) +
                                                        const_marginal_effect_shape
                                                    )
                                                    self.assertEqual(
                                                        shape(
                                                            est.
                                                            effect_interval(
                                                                X, T0=T0,
                                                                T1=T)),
                                                        (2, ) + effect_shape)

                                                # TODO: add tests for extra properties like coef_ where they exist

                                                score()

                                                # make sure we can call effect with implied scalar treatments,
                                                # no matter the dimensions of T, and also that we warn when there
                                                # are multiple treatments
                                                if d_t > 1:
                                                    cm = self.assertWarns(
                                                        Warning)
                                                else:
                                                    # ExitStack can be used as a "do nothing" ContextManager
                                                    cm = ExitStack()
                                                with cm:
                                                    effect_shape2 = (
                                                        n if d_x else 1, ) + (
                                                            (d_y, )
                                                            if d_y > 0 else ())
                                                    eff = est.effect(
                                                        X
                                                    ) if not discrete_t else est.effect(
                                                        X, T0='a', T1='b')
                                                    self.assertEqual(
                                                        shape(eff),
                                                        effect_shape2)
예제 #8
0
 def test_orthoiv_random_state(self):
     Y, T, X, W, X_test = self._make_data(500, 2)
     for est in [
             OrthoIV(model_y_xw=RandomForestRegressor(n_estimators=10,
                                                      max_depth=4,
                                                      random_state=123),
                     model_t_xw=RandomForestClassifier(n_estimators=10,
                                                       max_depth=4,
                                                       random_state=123),
                     model_z_xw=RandomForestClassifier(n_estimators=10,
                                                       max_depth=4,
                                                       random_state=123),
                     discrete_treatment=True,
                     discrete_instrument=True,
                     cv=2,
                     random_state=123),
             NonParamDMLIV(
                 model_y_xw=RandomForestRegressor(n_estimators=10,
                                                  max_depth=4,
                                                  random_state=123),
                 model_t_xw=RandomForestClassifier(n_estimators=10,
                                                   max_depth=4,
                                                   random_state=123),
                 model_t_xwz=RandomForestClassifier(n_estimators=10,
                                                    max_depth=4,
                                                    random_state=123),
                 model_final=LinearRegression(),
                 discrete_treatment=True,
                 discrete_instrument=True,
                 cv=2,
                 random_state=123),
             LinearDRIV(model_y_xw=RandomForestRegressor(n_estimators=10,
                                                         max_depth=4,
                                                         random_state=123),
                        model_t_xw=RandomForestClassifier(n_estimators=10,
                                                          max_depth=4,
                                                          random_state=123),
                        model_z_xw=RandomForestClassifier(n_estimators=10,
                                                          max_depth=4,
                                                          random_state=123),
                        model_tz_xw=RandomForestClassifier(
                            n_estimators=10, max_depth=4, random_state=123),
                        flexible_model_effect=StatsModelsLinearRegression(
                            fit_intercept=False),
                        discrete_treatment=True,
                        discrete_instrument=True,
                        cv=2,
                        random_state=123),
             IntentToTreatDRIV(
                 model_y_xw=RandomForestRegressor(n_estimators=10,
                                                  max_depth=4,
                                                  random_state=123),
                 model_t_xwz=RandomForestClassifier(n_estimators=10,
                                                    max_depth=4,
                                                    random_state=123),
                 flexible_model_effect=RandomForestRegressor(
                     n_estimators=10, max_depth=4, random_state=123),
                 cv=2,
                 random_state=123),
             LinearIntentToTreatDRIV(
                 model_y_xw=RandomForestRegressor(n_estimators=10,
                                                  max_depth=4,
                                                  random_state=123),
                 model_t_xwz=RandomForestClassifier(n_estimators=10,
                                                    max_depth=4,
                                                    random_state=123),
                 flexible_model_effect=RandomForestRegressor(
                     n_estimators=10, max_depth=4, random_state=123),
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est,
                                            X_test,
                                            Y,
                                            T,
                                            X=X,
                                            W=W,
                                            Z=T)
예제 #9
0
 def test_orthoiv_random_state(self):
     Y, T, X, W, X_test = self._make_data(500, 2)
     for est in [
             DMLATEIV(model_Y_W=RandomForestRegressor(n_estimators=10,
                                                      max_depth=4,
                                                      random_state=123),
                      model_T_W=RandomForestClassifier(n_estimators=10,
                                                       max_depth=4,
                                                       random_state=123),
                      model_Z_W=RandomForestClassifier(n_estimators=10,
                                                       max_depth=4,
                                                       random_state=123),
                      discrete_treatment=True,
                      discrete_instrument=True,
                      cv=2,
                      random_state=123),
             ProjectedDMLATEIV(
                 model_Y_W=RandomForestRegressor(n_estimators=10,
                                                 max_depth=4,
                                                 random_state=123),
                 model_T_W=RandomForestClassifier(n_estimators=10,
                                                  max_depth=4,
                                                  random_state=123),
                 model_T_WZ=RandomForestClassifier(n_estimators=10,
                                                   max_depth=4,
                                                   random_state=123),
                 discrete_treatment=True,
                 discrete_instrument=True,
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est, None, Y, T, W=W, Z=T)
     for est in [
             DMLIV(model_Y_X=RandomForestRegressor(n_estimators=10,
                                                   max_depth=4,
                                                   random_state=123),
                   model_T_X=RandomForestClassifier(n_estimators=10,
                                                    max_depth=4,
                                                    random_state=123),
                   model_T_XZ=RandomForestClassifier(n_estimators=10,
                                                     max_depth=4,
                                                     random_state=123),
                   model_final=LinearRegression(fit_intercept=False),
                   discrete_treatment=True,
                   discrete_instrument=True,
                   cv=2,
                   random_state=123),
             NonParamDMLIV(
                 model_Y_X=RandomForestRegressor(n_estimators=10,
                                                 max_depth=4,
                                                 random_state=123),
                 model_T_X=RandomForestClassifier(n_estimators=10,
                                                  max_depth=4,
                                                  random_state=123),
                 model_T_XZ=RandomForestClassifier(n_estimators=10,
                                                   max_depth=4,
                                                   random_state=123),
                 model_final=LinearRegression(),
                 discrete_treatment=True,
                 discrete_instrument=True,
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est, X_test, Y, T, X=X, Z=T)
     for est in [
             IntentToTreatDRIV(
                 model_Y_X=RandomForestRegressor(n_estimators=10,
                                                 max_depth=4,
                                                 random_state=123),
                 model_T_XZ=RandomForestClassifier(n_estimators=10,
                                                   max_depth=4,
                                                   random_state=123),
                 flexible_model_effect=RandomForestRegressor(
                     n_estimators=10, max_depth=4, random_state=123),
                 cv=2,
                 random_state=123),
             LinearIntentToTreatDRIV(
                 model_Y_X=RandomForestRegressor(n_estimators=10,
                                                 max_depth=4,
                                                 random_state=123),
                 model_T_XZ=RandomForestClassifier(n_estimators=10,
                                                   max_depth=4,
                                                   random_state=123),
                 flexible_model_effect=RandomForestRegressor(
                     n_estimators=10, max_depth=4, random_state=123),
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est,
                                            X_test,
                                            Y,
                                            T,
                                            X=X,
                                            W=W,
                                            Z=T)
예제 #10
0
    def test_cate_api(self):
        def const_marg_eff_shape(n, d_x, binary_T):
            return (n if d_x else 1, ) + ((1, ) if binary_T else ())

        def marg_eff_shape(n, binary_T):
            return (n, ) + ((1, ) if binary_T else ())

        def eff_shape(n, d_x):
            return (n if d_x else 1, )

        n = 1000
        y = np.random.normal(size=(n, ))

        for d_w in [None, 10]:
            if d_w is None:
                W = None
            else:
                W = np.random.normal(size=(n, d_w))
            for d_x in [None, 3]:
                if d_x is None:
                    X = None
                else:
                    X = np.random.normal(size=(n, d_x))
                for binary_T in [True, False]:
                    if binary_T:
                        T = np.random.choice(["a", "b"], size=(n, ))
                    else:
                        T = np.random.normal(size=(n, ))
                    for binary_Z in [True, False]:
                        if binary_Z:
                            Z = np.random.choice(["c", "d"], size=(n, ))
                        else:
                            Z = np.random.normal(size=(n, ))
                        for projection in [True, False]:
                            for featurizer in [
                                    None,
                                    PolynomialFeatures(degree=2,
                                                       include_bias=False),
                            ]:
                                est_list = [
                                    DRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        model_final=StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        fit_cate_intercept=True,
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                    LinearDRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        fit_cate_intercept=True,
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                    SparseLinearDRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        fit_cate_intercept=True,
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                    ForestDRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                ]

                                if X is None:
                                    est_list = est_list[:-1]

                                if binary_T and binary_Z:
                                    est_list += [
                                        IntentToTreatDRIV(
                                            flexible_model_effect=
                                            StatsModelsLinearRegression(
                                                fit_intercept=False),
                                            fit_cate_intercept=True,
                                            featurizer=featurizer,
                                        ),
                                        LinearIntentToTreatDRIV(
                                            flexible_model_effect=
                                            StatsModelsLinearRegression(
                                                fit_intercept=False),
                                            featurizer=featurizer,
                                        ),
                                    ]

                                for est in est_list:
                                    with self.subTest(d_w=d_w,
                                                      d_x=d_x,
                                                      binary_T=binary_T,
                                                      binary_Z=binary_Z,
                                                      projection=projection,
                                                      featurizer=featurizer,
                                                      est=est):

                                        # ensure we can serialize unfit estimator
                                        pickle.dumps(est)

                                        est.fit(y, T, Z=Z, X=X, W=W)

                                        # ensure we can serialize fit estimator
                                        pickle.dumps(est)

                                        # expected effect size
                                        const_marginal_effect_shape = const_marg_eff_shape(
                                            n, d_x, binary_T)
                                        marginal_effect_shape = marg_eff_shape(
                                            n, binary_T)
                                        effect_shape = eff_shape(n, d_x)
                                        # test effect
                                        const_marg_eff = est.const_marginal_effect(
                                            X)
                                        self.assertEqual(
                                            shape(const_marg_eff),
                                            const_marginal_effect_shape)
                                        marg_eff = est.marginal_effect(T, X)
                                        self.assertEqual(
                                            shape(marg_eff),
                                            marginal_effect_shape)
                                        T0 = "a" if binary_T else 0
                                        T1 = "b" if binary_T else 1
                                        eff = est.effect(X, T0=T0, T1=T1)
                                        self.assertEqual(
                                            shape(eff), effect_shape)

                                        # test inference
                                        const_marg_eff_int = est.const_marginal_effect_interval(
                                            X)
                                        marg_eff_int = est.marginal_effect_interval(
                                            T, X)
                                        eff_int = est.effect_interval(X,
                                                                      T0=T0,
                                                                      T1=T1)
                                        self.assertEqual(
                                            shape(const_marg_eff_int), (2, ) +
                                            const_marginal_effect_shape)
                                        self.assertEqual(
                                            shape(marg_eff_int),
                                            (2, ) + marginal_effect_shape)
                                        self.assertEqual(
                                            shape(eff_int),
                                            (2, ) + effect_shape)

                                        # test can run score
                                        est.score(y, T, Z=Z, X=X, W=W)

                                        if X is not None:
                                            # test cate_feature_names
                                            expect_feat_len = featurizer.fit(
                                                X
                                            ).n_output_features_ if featurizer else d_x
                                            self.assertEqual(
                                                len(est.cate_feature_names()),
                                                expect_feat_len)

                                            # test can run shap values
                                            shap_values = est.shap_values(
                                                X[:10])
예제 #11
0
    def test_cate_api(self):
        def const_marg_eff_shape(n, d_x, binary_T):
            """Constant marginal effect shape."""
            return (n if d_x else 1,) + ((1,) if binary_T else ())

        def marg_eff_shape(n, binary_T):
            """Marginal effect shape."""
            return (n,) + ((1,) if binary_T else ())

        def eff_shape(n, d_x):
            "Effect shape."
            return (n if d_x else 1,)

        n = 500
        y = np.random.normal(size=(n,))

        # parameter combinations to test
        for d_w, d_x, binary_T, binary_Z, projection, featurizer\
            in itertools.product(
                [None, 10],     # d_w
                [None, 3],      # d_x
                [True, False],  # binary_T
                [True, False],  # binary_Z
                [True, False],  # projection
                [None, PolynomialFeatures(degree=2, include_bias=False), ]):    # featurizer

            if d_w is None:
                W = None
            else:
                W = np.random.normal(size=(n, d_w))

            if d_x is None:
                X = None
            else:
                X = np.random.normal(size=(n, d_x))

            if binary_T:
                T = np.random.choice(["a", "b"], size=(n,))
            else:
                T = np.random.normal(size=(n,))

            if binary_Z:
                Z = np.random.choice(["c", "d"], size=(n,))
            else:
                Z = np.random.normal(size=(n,))

            est_list = [
                DRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    model_final=StatsModelsLinearRegression(
                        fit_intercept=False
                    ),
                    fit_cate_intercept=True,
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
                LinearDRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    fit_cate_intercept=True,
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
                SparseLinearDRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    fit_cate_intercept=True,
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
                ForestDRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
            ]

            if X is None:
                est_list = est_list[:-1]

            if binary_T and binary_Z:
                est_list += [
                    IntentToTreatDRIV(
                        flexible_model_effect=StatsModelsLinearRegression(
                            fit_intercept=False
                        ),
                        fit_cate_intercept=True,
                        featurizer=featurizer,
                    ),
                    LinearIntentToTreatDRIV(
                        flexible_model_effect=StatsModelsLinearRegression(
                            fit_intercept=False
                        ),
                        featurizer=featurizer,
                    ),
                ]

            for est in est_list:
                with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T,
                                  binary_Z=binary_Z, projection=projection, featurizer=featurizer,
                                  est=est):

                    # TODO: serializing/deserializing for every combination -- is this necessary?
                    # ensure we can serialize unfit estimator
                    pickle.dumps(est)

                    est.fit(y, T, Z=Z, X=X, W=W)

                    # ensure we can serialize fit estimator
                    pickle.dumps(est)

                    # expected effect size
                    exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
                    marginal_effect_shape = marg_eff_shape(n, binary_T)
                    effect_shape = eff_shape(n, d_x)

                    # assert calculated constant marginal effect shape is expected
                    # const_marginal effect is defined in LinearCateEstimator class
                    const_marg_eff = est.const_marginal_effect(X)
                    self.assertEqual(shape(const_marg_eff), exp_const_marginal_effect_shape)

                    # assert calculated marginal effect shape is expected
                    marg_eff = est.marginal_effect(T, X)
                    self.assertEqual(shape(marg_eff), marginal_effect_shape)

                    T0 = "a" if binary_T else 0
                    T1 = "b" if binary_T else 1
                    eff = est.effect(X, T0=T0, T1=T1)
                    self.assertEqual(shape(eff), effect_shape)

                    # test inference
                    const_marg_eff_int = est.const_marginal_effect_interval(X)
                    marg_eff_int = est.marginal_effect_interval(T, X)
                    eff_int = est.effect_interval(X, T0=T0, T1=T1)
                    self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape)
                    self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
                    self.assertEqual(shape(eff_int), (2,) + effect_shape)

                    # test can run score
                    est.score(y, T, Z=Z, X=X, W=W)

                    if X is not None:
                        # test cate_feature_names
                        expect_feat_len = featurizer.fit(
                            X).n_output_features_ if featurizer else d_x
                        self.assertEqual(len(est.cate_feature_names()), expect_feat_len)

                        # test can run shap values
                        _ = est.shap_values(X[:10])
예제 #12
0
    def test_orthoiv(self):
        y, T, X, W = self._get_data()
        Z = T.copy()
        est = OrthoIV(model_y_xw=LinearRegression(),
                      model_t_xw=LinearRegression(),
                      model_z_xw=LinearRegression(),
                      mc_iters=2)
        est.fit(y, T, Z=Z, W=W, cache_values=True)
        est.refit_final()
        est.model_y_xw = Lasso()
        est.model_t_xw = ElasticNet()
        est.model_z_xw = WeightedLasso()
        est.fit(y, T, Z=Z, W=W, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_y_xw._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_t_xw._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_z_xw._model,
                          WeightedLasso)

        est = DMLIV(model_y_xw=LinearRegression(),
                    model_t_xw=LinearRegression(),
                    model_t_xwz=LinearRegression(),
                    model_final=LinearRegression(fit_intercept=False),
                    mc_iters=2)
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        est.model_y_xw = Lasso()
        est.model_t_xw = ElasticNet()
        est.model_t_xwz = WeightedLasso()
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_y_xw._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_t_xw._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_t_xwz._model,
                          WeightedLasso)

        est = NonParamDMLIV(model_y_xw=LinearRegression(),
                            model_t_xw=LinearRegression(),
                            model_t_xwz=LinearRegression(),
                            model_final=LinearRegression(fit_intercept=True),
                            mc_iters=2)
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.model_final = WeightedLasso()
        est.refit_final()
        assert isinstance(est.model_cate, WeightedLasso)
        assert isinstance(est.featurizer_, PolynomialFeatures)

        est = IntentToTreatDRIV(model_y_xw=LinearRegression(),
                                model_t_xwz=LogisticRegression(),
                                flexible_model_effect=LinearRegression())
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert est.model_final is None
        assert isinstance(est.model_final_, LinearRegression)
        est.flexible_model_effect = Lasso()
        est.refit_final()
        assert est.model_final is None
        assert isinstance(est.model_final_, Lasso)
        est.model_final = Lasso()
        est.refit_final()
        assert isinstance(est.model_final, Lasso)
        assert isinstance(est.model_final_, Lasso)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_,
            LinearRegression)
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_, Lasso)

        est = LinearIntentToTreatDRIV(model_y_xw=LinearRegression(),
                                      model_t_xwz=LogisticRegression(),
                                      flexible_model_effect=LinearRegression())
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        est.fit_cate_intercept = False
        est.intercept_
        est.intercept__interval()
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        with pytest.raises(AttributeError):
            est.intercept__interval()
        with pytest.raises(ValueError):
            est.model_final = LinearRegression()
        est.flexible_model_effect = Lasso()
        est.fit(y, T, Z=Z, X=X, W=W, cache_values=True)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_, Lasso)
예제 #13
0
    def test_orthoiv(self):
        y, T, X, W = self._get_data()
        Z = T.copy()
        est = DMLATEIV(model_Y_W=LinearRegression(),
                       model_T_W=LinearRegression(),
                       model_Z_W=LinearRegression(),
                       mc_iters=2)
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        est.refit_final()
        est.model_Y_W = Lasso()
        est.model_T_W = ElasticNet()
        est.model_Z_W = WeightedLasso()
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_Y_W._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_T_W._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_Z_W._model,
                          WeightedLasso)

        est = ProjectedDMLATEIV(model_Y_W=LinearRegression(),
                                model_T_W=LinearRegression(),
                                model_T_WZ=LinearRegression(),
                                mc_iters=2)
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        est.refit_final()
        est.model_Y_W = Lasso()
        est.model_T_W = ElasticNet()
        est.model_T_WZ = WeightedLasso()
        est.fit(y, T, W=W, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_Y_W._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_T_W._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_T_WZ._model,
                          WeightedLasso)

        est = DMLIV(model_Y_X=LinearRegression(),
                    model_T_X=LinearRegression(),
                    model_T_XZ=LinearRegression(),
                    model_final=LinearRegression(fit_intercept=False),
                    mc_iters=2)
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        np.testing.assert_equal(len(est.coef_), X.shape[1])
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.refit_final()
        np.testing.assert_equal(len(est.coef_), X.shape[1]**2)
        est.intercept_
        est.fit_cate_intercept = False
        est.intercept_
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        est.model_Y_X = Lasso()
        est.model_T_X = ElasticNet()
        est.model_T_XZ = WeightedLasso()
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        assert isinstance(est.models_Y_X[0][0], Lasso)
        assert isinstance(est.models_T_X[0][0], ElasticNet)
        assert isinstance(est.models_T_XZ[0][0], WeightedLasso)

        est = DMLIV(model_Y_X=LinearRegression(),
                    model_T_X=LinearRegression(),
                    model_T_XZ=LinearRegression(),
                    model_final=LinearRegression(fit_intercept=False),
                    mc_iters=2)
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        np.testing.assert_equal(len(est.coef_), X.shape[1])
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.refit_final()
        np.testing.assert_equal(len(est.coef_), X.shape[1]**2)
        est.intercept_
        est.fit_cate_intercept = False
        est.intercept_
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        est.model_Y_X = Lasso()
        est.model_T_X = ElasticNet()
        est.model_T_XZ = WeightedLasso()
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        assert isinstance(est.models_nuisance_[0][0]._model_Y_X._model, Lasso)
        assert isinstance(est.models_nuisance_[0][0]._model_T_X._model,
                          ElasticNet)
        assert isinstance(est.models_nuisance_[0][0]._model_T_XZ._model,
                          WeightedLasso)

        est = NonParamDMLIV(model_Y_X=LinearRegression(),
                            model_T_X=LinearRegression(),
                            model_T_XZ=LinearRegression(),
                            model_final=LinearRegression(fit_intercept=True),
                            mc_iters=2)
        est.fit(y, T, X=X, Z=Z, cache_values=True)
        est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
        est.model_final = WeightedLasso()
        est.refit_final()
        assert isinstance(est.model_cate, WeightedLasso)
        assert isinstance(est.featurizer_, PolynomialFeatures)

        est = IntentToTreatDRIV(model_Y_X=LinearRegression(),
                                model_T_XZ=LogisticRegression(),
                                flexible_model_effect=LinearRegression())
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        assert est.model_final is None
        assert isinstance(est.model_final_, LinearRegression)
        est.flexible_model_effect = Lasso()
        est.refit_final()
        assert est.model_final is None
        assert isinstance(est.model_final_, Lasso)
        est.model_final = Lasso()
        est.refit_final()
        assert isinstance(est.model_final, Lasso)
        assert isinstance(est.model_final_, Lasso)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_,
            LinearRegression)
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_, Lasso)

        est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(),
                                      model_T_XZ=LogisticRegression(),
                                      flexible_model_effect=LinearRegression())
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        est.fit_cate_intercept = False
        est.intercept_
        est.intercept__interval()
        est.refit_final()
        with pytest.raises(AttributeError):
            est.intercept_
        with pytest.raises(AttributeError):
            est.intercept__interval()
        with pytest.raises(ValueError):
            est.model_final = LinearRegression()
        est.flexible_model_effect = Lasso()
        est.fit(y, T, X=X, W=W, Z=Z, cache_values=True)
        assert isinstance(
            est.models_nuisance_[0][0]._prel_model_effect.model_final_, Lasso)