コード例 #1
0
 def test_discrete_treatments(self):
     """
     TODO Almost identical to DML test, so consider merging
     Test that we can use discrete treatments
     """
     dml = LinearDRLearner(model_regression=LinearRegression(),
                           model_propensity=LogisticRegression(
                               C=1000, solver='lbfgs', multi_class='auto'),
                           featurizer=FunctionTransformer(validate=True))
     # create a simple artificial setup where effect of moving from treatment
     #     1 -> 2 is 2,
     #     1 -> 3 is 1, and
     #     2 -> 3 is -1 (necessarily, by composing the previous two effects)
     # Using an uneven number of examples from different classes,
     # and having the treatments in non-lexicographic order,
     # Should rule out some basic issues.
     dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array(
         [3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
     np.testing.assert_almost_equal(dml.effect(np.ones((9, 1)),
                                               T0=np.array(
                                                   [1, 1, 1, 2, 2, 2, 3, 3, 3]),
                                               T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])),
                                    [0, 2, 1, -2, 0, -1, -1, 1, 0])
     dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array(
         [3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
コード例 #2
0
    def test_can_custom_splitter(self):
        """
        TODO Almost identical to DML test, so consider merging
        """
        # test that we can fit with a KFold instance
        dml = LinearDRLearner(model_regression=LinearRegression(),
                              model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'),
                              n_splits=KFold(n_splits=3))
        dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
        dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)))

        # test that we can fit with a train/test iterable
        dml = LinearDRLearner(model_regression=LinearRegression(),
                              model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'),
                              n_splits=[([0, 1, 2], [3, 4, 5])])
        dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
        dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
コード例 #3
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d > 0 else (n, )
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 3 and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        d_y = 0
        is_discrete = True
        for d_t in [0, 1]:
            for d_x in [2, None]:
                for d_w in [2, None]:
                    with self.subTest(d_t=d_t, d_x=d_x, d_w=d_w):
                        W, X, Y, T = [
                            make_random(is_discrete, d)
                            for is_discrete, d in [(False, d_w), (
                                False, d_x), (False, d_y), (is_discrete, d_t)]
                        ]

                        if (X is None) and (W is None):
                            continue
                        d_t_final = 2 if is_discrete else d_t

                        effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ())
                        marginal_effect_shape = ((n, ) + (
                            (d_y, ) if d_y > 0 else
                            ()) + ((d_t_final, ) if d_t_final > 0 else ()))

                        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                        const_marginal_effect_shape = ((n if d_x else 1, ) + (
                            (d_y, ) if d_y > 0 else
                            ()) + ((d_t_final, ) if d_t_final > 0 else ()))

                        # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
                        infs = [None, 'statsmodels']

                        est = LinearDRLearner(
                            model_regression=Lasso(),
                            model_propensity=LogisticRegression(
                                C=1000, solver='lbfgs', multi_class='auto'))

                        for inf in infs:
                            with self.subTest(d_w=d_w,
                                              d_x=d_x,
                                              d_y=d_y,
                                              d_t=d_t,
                                              is_discrete=is_discrete,
                                              est=est,
                                              inf=inf):
                                est.fit(Y, T, X, W, inference=inf)
                                # make sure we can call the marginal_effect and effect methods
                                const_marg_eff = est.const_marginal_effect(X)
                                marg_eff = est.marginal_effect(T, X)
                                self.assertEqual(shape(marg_eff),
                                                 marginal_effect_shape)
                                self.assertEqual(shape(const_marg_eff),
                                                 const_marginal_effect_shape)

                                np.testing.assert_array_equal(
                                    marg_eff if d_x else marg_eff[0:1],
                                    const_marg_eff)

                                T0 = np.full_like(T, 'a')
                                eff = est.effect(X, T0=T0, T1=T)
                                self.assertEqual(shape(eff), effect_shape)
                                if inf is not None:
                                    const_marg_eff_int = est.const_marginal_effect_interval(
                                        X)
                                    marg_eff_int = est.marginal_effect_interval(
                                        T, X)
                                    self.assertEqual(shape(marg_eff_int),
                                                     (2, ) +
                                                     marginal_effect_shape)
                                    self.assertEqual(
                                        shape(const_marg_eff_int),
                                        (2, ) + const_marginal_effect_shape)
                                    self.assertEqual(
                                        shape(
                                            est.effect_interval(X, T0=T0,
                                                                T1=T)),
                                        (2, ) + effect_shape)

                                est.score(Y, T, X, W)

                                # make sure we can call effect with implied scalar treatments, no matter the
                                # dimensions of T, and also that we warn when there are multiple treatments
                                if d_t > 1:
                                    cm = self.assertWarns(Warning)
                                else:
                                    cm = ExitStack(
                                    )  # ExitStack can be used as a "do nothing" ContextManager
                                with cm:
                                    effect_shape2 = (n if d_x else 1, ) + (
                                        (d_y, ) if d_y > 0 else ())
                                    eff = est.effect(X, T0='a', T1='b')
                                    self.assertEqual(shape(eff), effect_shape2)
コード例 #4
0
ファイル: test_drlearner.py プロジェクト: sijeong/EconML
    def test_linear_drlearner_all_attributes(self):
        from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
        from sklearn.linear_model import LinearRegression, LogisticRegression
        from econml.utilities import StatsModelsLinearRegression
        import scipy.special
        np.random.seed(123)
        controls = np.random.uniform(-1, 1, size=(5000, 3))
        T = np.random.binomial(2, scipy.special.expit(controls[:, 0]))
        sigma = 0.01
        y = (1 + .5 * controls[:, 0]) * T + controls[:, 0] + np.random.normal(
            0, sigma, size=(5000, ))
        for X in [None, controls]:
            for W in [None, controls]:
                for sample_weight, sample_var in [(None, None),
                                                  (np.ones(T.shape[0]),
                                                   np.zeros(T.shape[0]))]:
                    for featurizer in [
                            None,
                            PolynomialFeatures(degree=2, include_bias=False)
                    ]:
                        for models in [(GradientBoostingClassifier(),
                                        GradientBoostingRegressor()),
                                       (LogisticRegression(solver='lbfgs',
                                                           multi_class='auto'),
                                        LinearRegression())]:
                            for inference in [
                                    'statsmodels',
                                    StatsModelsInferenceDiscrete(
                                        cov_type='nonrobust')
                            ]:
                                with self.subTest(X=X,
                                                  W=W,
                                                  sample_weight=sample_weight,
                                                  sample_var=sample_var,
                                                  featurizer=featurizer,
                                                  models=models,
                                                  inference=inference):
                                    est = LinearDRLearner(
                                        model_propensity=models[0],
                                        model_regression=models[1],
                                        featurizer=featurizer)
                                    if (X is None) and (W is None):
                                        with pytest.raises(
                                                AttributeError) as e_info:
                                            est.fit(
                                                y,
                                                T,
                                                X=X,
                                                W=W,
                                                sample_weight=sample_weight,
                                                sample_var=sample_var)
                                        continue
                                    est.fit(y,
                                            T,
                                            X=X,
                                            W=W,
                                            sample_weight=sample_weight,
                                            sample_var=sample_var,
                                            inference=inference)
                                    if X is not None:
                                        lower, upper = est.effect_interval(
                                            X[:3], T0=0, T1=1)
                                        point = est.effect(X[:3], T0=0, T1=1)
                                        truth = 1 + .5 * X[:3, 0]
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
                                        lower, upper = est.const_marginal_effect_interval(
                                            X[:3])
                                        point = est.const_marginal_effect(
                                            X[:3])
                                        truth = np.hstack([
                                            1 + .5 * X[:3, [0]],
                                            2 * (1 + .5 * X[:3, [0]])
                                        ])
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
                                    else:
                                        lower, upper = est.effect_interval(
                                            T0=0, T1=1)
                                        point = est.effect(T0=0, T1=1)
                                        truth = np.array([1])
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
                                        lower, upper = est.const_marginal_effect_interval(
                                        )
                                        point = est.const_marginal_effect()
                                        truth = np.array([[1, 2]])
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)

                                    for t in [1, 2]:
                                        if X is not None:
                                            lower, upper = est.marginal_effect_interval(
                                                t, X[:3])
                                            point = est.marginal_effect(
                                                t, X[:3])
                                            truth = np.hstack([
                                                1 + .5 * X[:3, [0]],
                                                2 * (1 + .5 * X[:3, [0]])
                                            ])
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)
                                        else:
                                            lower, upper = est.marginal_effect_interval(
                                                t)
                                            point = est.marginal_effect(t)
                                            truth = np.array([[1, 2]])
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)
                                    assert isinstance(est.score_, float)
                                    assert isinstance(
                                        est.score(y, T, X=X, W=W), float)

                                    if X is not None:
                                        feat_names = ['A', 'B', 'C']
                                    else:
                                        feat_names = []
                                    out_feat_names = feat_names
                                    if X is not None:
                                        if (featurizer is not None):
                                            out_feat_names = featurizer.fit(
                                                X).get_feature_names(
                                                    feat_names)
                                            np.testing.assert_array_equal(
                                                est.featurizer.
                                                n_input_features_, 3)
                                        np.testing.assert_array_equal(
                                            est.cate_feature_names(feat_names),
                                            out_feat_names)

                                    if isinstance(models[0],
                                                  GradientBoostingClassifier):
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.feature_importances_ for
                                                mdl in est.models_regression
                                            ]).shape, [
                                                2, 2 + len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.feature_importances_ for
                                                mdl in est.models_propensity
                                            ]).shape, [
                                                2,
                                                len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])
                                    else:
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.coef_ for mdl in
                                                est.models_regression
                                            ]).shape, [
                                                2, 2 + len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.coef_ for mdl in
                                                est.models_propensity
                                            ]).shape, [
                                                2, 3,
                                                len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])

                                    if X is not None:
                                        for t in [1, 2]:
                                            true_coef = np.zeros(
                                                len(out_feat_names))
                                            true_coef[0] = .5 * t
                                            lower, upper = est.model_cate(
                                                T=t).coef__interval()
                                            point = est.model_cate(T=t).coef_
                                            truth = true_coef
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)

                                            lower, upper = est.coef__interval(
                                                t)
                                            point = est.coef_(t)
                                            truth = true_coef
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)
                                    for t in [1, 2]:
                                        lower, upper = est.model_cate(
                                            T=t).intercept__interval()
                                        point = est.model_cate(T=t).intercept_
                                        truth = t
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)

                                        lower, upper = est.intercept__interval(
                                            t)
                                        point = est.intercept_(t)
                                        truth = t
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)