Ejemplo n.º 1
0
 def transform(self, X):
     # add column of ones to X
     X = hstack([np.ones((shape(X)[0], 1)), X])
     d_x = shape(X)[1]
     d_y, d_t = self._d_y, self._d_t
     # for each row, create the d_y*d_t*(d_x+1) features (which are matrices of size d_y by d_t)
     return reshape(np.einsum('nx,fyt->nfxyt', X, self._fts), (shape(X)[0], d_y * d_t * d_x, d_y, d_t))
Ejemplo n.º 2
0
    def _generate_recoverable_errors(a_X,
                                     X,
                                     a_W=None,
                                     W=None,
                                     featurizer=FunctionTransformer()):
        """Return error vectors e_t and e_y such that OLS can recover the true coefficients from both stages."""
        if W is None:
            W = np.empty((shape(X)[0], 0))
        if a_W is None:
            a_W = np.zeros((shape(W)[1], ))
        # to correctly recover coefficients for T via OLS, we need e_t to be orthogonal to [W;X]
        WX = hstack([W, X])
        e_t = rand_sol(WX.T, np.zeros((shape(WX)[1], )))

        # to correctly recover coefficients for Y via OLS, we need ([X; W]⊗[1; ϕ(X); W])⁺ e_y =
        #                                                          -([X; W]⊗[1; ϕ(X); W])⁺ ((ϕ(X)⊗e_t)a_X+(W⊗e_t)a_W)
        # then, to correctly recover a in the third stage, we additionally need (ϕ(X)⊗e_t)ᵀ e_y = 0

        ϕ = featurizer.fit_transform(X)

        v_X = cross_product(ϕ, e_t)
        v_W = cross_product(W, e_t)

        M = np.linalg.pinv(
            cross_product(WX, hstack([np.ones((shape(WX)[0], 1)), ϕ, W])))
        e_y = rand_sol(
            vstack([M, v_X.T]),
            vstack([-M @ (v_X @ a_X + v_W @ a_W),
                    np.zeros((shape(v_X)[1], ))]))

        return e_t, e_y
    def test_2sls_shape(self):
        n = 100

        def make_random(d):
            sz = (n, d) if d >= 0 else (n,)
            return np.random.normal(size=sz)

        for d_t in [-1, 1, 2]:
            n_t = d_t if d_t > 0 else 1
            for d_y in [-1, 1, 2]:
                for d_x in [1, 5]:
                    for d_z in [1, 2]:
                        d_w = 1
                        if d_z >= n_t:
                            T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]]
                            est = NonparametricTwoStageLeastSquares(
                                t_featurizer=PolynomialFeatures(),
                                x_featurizer=PolynomialFeatures(),
                                z_featurizer=PolynomialFeatures(),
                                dt_featurizer=DPolynomialFeatures())

                            est.fit(Y, T, X=X, W=W, Z=Z)

                            eff = est.effect(X)
                            marg_eff = est.marginal_effect(T, X)

                            effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
                            marginal_effect_shape = ((n if d_x else 1,) +
                                                     ((d_y,) if d_y > 0 else ()) +
                                                     ((d_t,) if d_t > 0 else()))

                            self.assertEqual(shape(marg_eff), marginal_effect_shape)
                            self.assertEqual(shape(eff), effect_shape)
Ejemplo n.º 4
0
    def test_2sls(self):
        n = 50000
        e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1))
        z = np.random.uniform(size=(n, 1))
        x = np.random.uniform(size=(n, 1)) + e
        p = x + z * e + np.random.uniform(size=(n, 1))
        y = p * x + e

        losses = []
        marg_effs = []

        z_fresh = np.random.uniform(size=(n, 1))
        e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1))
        x_fresh = np.random.uniform(size=(n, 1)) + e_fresh
        p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1))

        for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10),
                             (3, 3, 10), (10, 10, 3)]:
            np2sls = NonparametricTwoStageLeastSquares(
                HermiteFeatures(dt), HermiteFeatures(dx), HermiteFeatures(dz),
                HermiteFeatures(dt, shift=1))
            np2sls.fit(y, p, x, z)
            effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh)
            losses.append(np.mean(np.square(p_fresh * x_fresh - effect)))
            marg_effs.append(
                np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]),
                                       np.array([[0.4], [0.6], [0.2]])))
        print("losses: {}".format(losses))
        print("marg_effs: {}".format(marg_effs))
Ejemplo n.º 5
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit the ordinary least squares model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data
        y : array_like, shape (n_samples, 1) or (n_samples,)
            Target values
        sample_weight : array_like, shape (n_samples,)
            Individual weights for each sample

        Returns
        -------
        self
        """
        assert ndim(y) == 1 or (ndim(y) == 2 and shape(y)[1] == 1)
        y = reshape(y, (-1,))
        if self.fit_intercept:
            X = add_constant(X, has_constant='add')
        if sample_weight is not None:
            ols = WLS(y, X, weights=sample_weight, hasconst=self.fit_intercept)
        else:
            ols = WLS(y, X, hasconst=self.fit_intercept)
        self.results = ols.fit(**self.fit_args)
        return self
Ejemplo n.º 6
0
 def test_hermite_shape(self):
     for d, s in [(3, 0), (4, 2)]:
         for j in [True, False]:
             for n, x in [(5, 1), (7, 3)]:
                 last_dim = (d + 1)**x if j else (d + 1) * x
                 correct_shape = (n,) + (x,) * s + (last_dim,)
                 output_shape = shape(HermiteFeatures(d, s, j).fit_transform(np.zeros((n, x))))
                 assert output_shape == correct_shape
Ejemplo n.º 7
0
 def fit(self, X, y, sample_weight=None):
     self.needs_unravel = False
     if ndim(y) == 2 and shape(y)[1] > 1:
         self.model = WeightedMultiTaskLassoCV(*self.args, **self.kwargs)
     else:
         if ndim(y) == 2 and shape(y)[1] == 1:
             y = np.ravel(y)
             self.needs_unravel = True
         self.model = WeightedLassoCV(*self.args, **self.kwargs)
     self.model.fit(X, y, sample_weight)
     # set intercept_ attribute
     self.intercept_ = self.model.intercept_
     # set coef_ attribute
     self.coef_ = self.model.coef_
     # set alpha_ attribute
     self.alpha_ = self.model.alpha_
     # set alphas_ attribute
     self.alphas_ = self.model.alphas_
     # set n_iter_ attribute
     self.n_iter_ = self.model.n_iter_
     return self
Ejemplo n.º 8
0
    def test_hermite_results(self):
        inputs = np.random.normal(size=(5, 1))
        hf = HermiteFeatures(3).fit_transform(inputs)
        # first polynomials are 1, x, x*x-1, x*x*x-3*x
        ones = np.ones(shape(inputs))
        polys = np.hstack([ones, inputs, inputs * inputs - ones, inputs * inputs * inputs - 3 * inputs])
        assert(np.allclose(hf, polys * np.exp(-inputs * inputs / 2)))

        for j in [True, False]:
            hf = HermiteFeatures(1, shift=1, joint=j).fit_transform(inputs)
            # first derivatives are -x, -x^2+1 (since there's just one column, joint-ness doesn't matter)
            polys = np.hstack([-inputs, -inputs * inputs + ones])
            assert(np.allclose(hf, reshape(polys * np.exp(-inputs * inputs / 2), (5, 1, 2))))
    def test_2sls(self):
        n = 50000
        d_w = 2
        d_z = 1
        d_x = 1
        d_t = 1
        d_y = 1
        e = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x))
        z = np.random.uniform(size=(n, 1))
        w = np.random.uniform(size=(n, d_w))
        a = np.random.normal(size=(d_w, d_t))
        b = np.random.normal(size=(d_w, d_y))
        x = np.random.uniform(size=(n, d_x)) + e
        p = x + z * e + w @ a + np.random.uniform(size=(n, d_t))
        y = p * x + e + w @ b

        losses = []
        marg_effs = []

        z_fresh = np.random.uniform(size=(n, d_z))
        e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x))
        x_fresh = np.random.uniform(size=(n, d_x)) + e_fresh
        w_fresh = np.random.uniform(size=(n, d_w))
        p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n,
                                                                        d_t))

        for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10),
                             (3, 3, 10), (10, 10, 3)]:
            np2sls = SieveTSLS(t_featurizer=HermiteFeatures(dt),
                               x_featurizer=HermiteFeatures(dx),
                               z_featurizer=HermiteFeatures(dz),
                               dt_featurizer=HermiteFeatures(dt, shift=1))
            np2sls.fit(y, p, X=x, W=w, Z=z)
            effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh)
            losses.append(np.mean(np.square(p_fresh * x_fresh - effect)))
            marg_effs.append(
                np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]),
                                       np.array([[0.4], [0.6], [0.2]])))
        print("losses: {}".format(losses))
        print("marg_effs: {}".format(marg_effs))
Ejemplo n.º 10
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n_panels = 100  # number of panels
        n_periods = 3  # number of time periods per panel
        n = n_panels * n_periods
        groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0)

        def make_random(n, is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d >= 0 else (n,)
            if is_discrete:
                return np.random.choice(['a', 'b', 'c'], size=sz)
            else:
                return np.random.normal(size=sz)

        for d_t in [2, 1, -1]:
            for is_discrete in [True, False] if d_t <= 1 else [False]:
                # for is_discrete in [False]:
                for d_y in [3, 1, -1]:
                    for d_x in [2, None]:
                        for d_w in [2, None]:
                            W, X, Y, T = [make_random(n, is_discrete, d)
                                          for is_discrete, d in [(False, d_w),
                                                                 (False, d_x),
                                                                 (False, d_y),
                                                                 (is_discrete, d_t)]]
                            T_test = np.hstack([(T.reshape(-1, 1) if d_t == -1 else T) for i in range(n_periods)])
                            for featurizer, fit_cate_intercept in\
                                [(None, True),
                                 (PolynomialFeatures(degree=2, include_bias=False), True),
                                 (PolynomialFeatures(degree=2, include_bias=True), False)]:

                                d_t_final = (2 if is_discrete else max(d_t, 1)) * n_periods

                                effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
                                effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1), 6)
                                marginal_effect_shape = ((n,) +
                                                         ((d_y,) if d_y > 0 else ()) +
                                                         ((d_t_final,) if d_t_final > 0 else ()))
                                marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1) *
                                                                      (d_t_final if d_t_final > 0 else 1), 6)

                                # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                                const_marginal_effect_shape = ((n if d_x else 1,) +
                                                               ((d_y,) if d_y > 0 else ()) +
                                                               ((d_t_final,) if d_t_final > 0 else()))
                                const_marginal_effect_summaryframe_shape = (
                                    (n if d_x else 1) * (d_y if d_y > 0 else 1) *
                                    (d_t_final if d_t_final > 0 else 1), 6)

                                fd_x = featurizer.fit_transform(X).shape[1:] if featurizer and d_x\
                                    else ((d_x,) if d_x else (0,))
                                coef_shape = Y.shape[1:] + (d_t_final, ) + fd_x

                                coef_summaryframe_shape = (
                                    (d_y if d_y > 0 else 1) * (fd_x[0] if fd_x[0] >
                                                               0 else 1) * (d_t_final), 6)
                                intercept_shape = Y.shape[1:] + (d_t_final, )
                                intercept_summaryframe_shape = (
                                    (d_y if d_y > 0 else 1) * (d_t_final if d_t_final > 0 else 1), 6)

                                all_infs = [None, 'auto', BootstrapInference(2)]
                                est = DynamicDML(model_y=Lasso() if d_y < 1 else MultiTaskLasso(),
                                                 model_t=LogisticRegression() if is_discrete else
                                                 (Lasso() if d_t < 1 else MultiTaskLasso()),
                                                 featurizer=featurizer,
                                                 fit_cate_intercept=fit_cate_intercept,
                                                 discrete_treatment=is_discrete)

                                # ensure we can serialize the unfit estimator
                                pickle.dumps(est)

                                for inf in all_infs:
                                    with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
                                                      is_discrete=is_discrete, est=est, inf=inf):

                                        if X is None and (not fit_cate_intercept):
                                            with pytest.raises(AttributeError):
                                                est.fit(Y, T, X=X, W=W, groups=groups, inference=inf)
                                            continue

                                        est.fit(Y, T, X=X, W=W, groups=groups, inference=inf)

                                        # ensure we can pickle the fit estimator
                                        pickle.dumps(est)

                                        # make sure we can call the marginal_effect and effect methods
                                        const_marg_eff = est.const_marginal_effect(X)
                                        marg_eff = est.marginal_effect(T_test, X)
                                        self.assertEqual(shape(marg_eff), marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)

                                        np.testing.assert_allclose(
                                            marg_eff if d_x else marg_eff[0:1], const_marg_eff)

                                        assert len(est.score_) == n_periods
                                        for score in est.nuisance_scores_y[0]:
                                            assert score.shape == (n_periods, )
                                        for score in est.nuisance_scores_t[0]:
                                            assert score.shape == (n_periods, n_periods)

                                        T0 = np.full_like(T_test, 'a') if is_discrete else np.zeros_like(T_test)
                                        eff = est.effect(X, T0=T0, T1=T_test)
                                        self.assertEqual(shape(eff), effect_shape)

                                        self.assertEqual(shape(est.coef_), coef_shape)
                                        if fit_cate_intercept:
                                            self.assertEqual(shape(est.intercept_), intercept_shape)
                                        else:
                                            with pytest.raises(AttributeError):
                                                self.assertEqual(shape(est.intercept_), intercept_shape)

                                        if inf is not None:
                                            const_marg_eff_int = est.const_marginal_effect_interval(X)
                                            marg_eff_int = est.marginal_effect_interval(T_test, X)
                                            self.assertEqual(shape(marg_eff_int),
                                                             (2,) + marginal_effect_shape)
                                            self.assertEqual(shape(const_marg_eff_int),
                                                             (2,) + const_marginal_effect_shape)
                                            self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T_test)),
                                                             (2,) + effect_shape)
                                            self.assertEqual(shape(est.coef__interval()),
                                                             (2,) + coef_shape)
                                            if fit_cate_intercept:
                                                self.assertEqual(shape(est.intercept__interval()),
                                                                 (2,) + intercept_shape)
                                            else:
                                                with pytest.raises(AttributeError):
                                                    self.assertEqual(shape(est.intercept__interval()),
                                                                     (2,) + intercept_shape)

                                            const_marg_effect_inf = est.const_marginal_effect_inference(X)
                                            T1 = np.full_like(T_test, 'b') if is_discrete else T_test
                                            effect_inf = est.effect_inference(X, T0=T0, T1=T1)
                                            marg_effect_inf = est.marginal_effect_inference(T_test, X)
                                            # test const marginal inference
                                            self.assertEqual(shape(const_marg_effect_inf.summary_frame()),
                                                             const_marginal_effect_summaryframe_shape)
                                            self.assertEqual(shape(const_marg_effect_inf.point_estimate),
                                                             const_marginal_effect_shape)
                                            self.assertEqual(shape(const_marg_effect_inf.stderr),
                                                             const_marginal_effect_shape)
                                            self.assertEqual(shape(const_marg_effect_inf.var),
                                                             const_marginal_effect_shape)
                                            self.assertEqual(shape(const_marg_effect_inf.pvalue()),
                                                             const_marginal_effect_shape)
                                            self.assertEqual(shape(const_marg_effect_inf.zstat()),
                                                             const_marginal_effect_shape)
                                            self.assertEqual(shape(const_marg_effect_inf.conf_int()),
                                                             (2,) + const_marginal_effect_shape)
                                            np.testing.assert_array_almost_equal(
                                                const_marg_effect_inf.conf_int()[0],
                                                const_marg_eff_int[0], decimal=5)
                                            const_marg_effect_inf.population_summary()._repr_html_()

                                            # test effect inference
                                            self.assertEqual(shape(effect_inf.summary_frame()),
                                                             effect_summaryframe_shape)
                                            self.assertEqual(shape(effect_inf.point_estimate),
                                                             effect_shape)
                                            self.assertEqual(shape(effect_inf.stderr),
                                                             effect_shape)
                                            self.assertEqual(shape(effect_inf.var),
                                                             effect_shape)
                                            self.assertEqual(shape(effect_inf.pvalue()),
                                                             effect_shape)
                                            self.assertEqual(shape(effect_inf.zstat()),
                                                             effect_shape)
                                            self.assertEqual(shape(effect_inf.conf_int()),
                                                             (2,) + effect_shape)
                                            np.testing.assert_array_almost_equal(
                                                effect_inf.conf_int()[0],
                                                est.effect_interval(X, T0=T0, T1=T1)[0], decimal=5)
                                            effect_inf.population_summary()._repr_html_()

                                            # test marginal effect inference
                                            self.assertEqual(shape(marg_effect_inf.summary_frame()),
                                                             marginal_effect_summaryframe_shape)
                                            self.assertEqual(shape(marg_effect_inf.point_estimate),
                                                             marginal_effect_shape)
                                            self.assertEqual(shape(marg_effect_inf.stderr),
                                                             marginal_effect_shape)
                                            self.assertEqual(shape(marg_effect_inf.var),
                                                             marginal_effect_shape)
                                            self.assertEqual(shape(marg_effect_inf.pvalue()),
                                                             marginal_effect_shape)
                                            self.assertEqual(shape(marg_effect_inf.zstat()),
                                                             marginal_effect_shape)
                                            self.assertEqual(shape(marg_effect_inf.conf_int()),
                                                             (2,) + marginal_effect_shape)
                                            np.testing.assert_array_almost_equal(
                                                marg_effect_inf.conf_int()[0], marg_eff_int[0], decimal=5)
                                            marg_effect_inf.population_summary()._repr_html_()

                                            # test coef__inference and intercept__inference
                                            if X is not None:
                                                self.assertEqual(
                                                    shape(est.coef__inference().summary_frame()),
                                                    coef_summaryframe_shape)
                                                np.testing.assert_array_almost_equal(
                                                    est.coef__inference().conf_int()
                                                    [0], est.coef__interval()[0], decimal=5)

                                            if fit_cate_intercept:
                                                cm = ExitStack()
                                                # ExitStack can be used as a "do nothing" ContextManager
                                            else:
                                                cm = pytest.raises(AttributeError)
                                            with cm:
                                                self.assertEqual(shape(est.intercept__inference().
                                                                       summary_frame()),
                                                                 intercept_summaryframe_shape)
                                                np.testing.assert_array_almost_equal(
                                                    est.intercept__inference().conf_int()
                                                    [0], est.intercept__interval()[0], decimal=5)

                                            est.summary()
                                        est.score(Y, T, X, W, groups=groups)
                                        # make sure we can call effect with implied scalar treatments,
                                        # no matter the dimensions of T, and also that we warn when there
                                        # are multiple treatments
                                        if d_t > 1:
                                            cm = self.assertWarns(Warning)
                                        else:
                                            # ExitStack can be used as a "do nothing" ContextManager
                                            cm = ExitStack()
                                        with cm:
                                            effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
                                            eff = est.effect(X) if not is_discrete else est.effect(
                                                X, T0='a', T1='b')
                                            self.assertEqual(shape(eff), effect_shape2)
Ejemplo n.º 11
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 30

        def size(n, d):
            return (n, d) if d >= 0 else (n, )

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = size(n, d)
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every row
                    _, counts = np.unique(arr, return_counts=True, axis=0)
                    if len(counts) == 3**(d if d > 0 else
                                          1) and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        def eff_shape(n, d_y):
            return (n, ) + ((d_y, ) if d_y > 0 else ())

        def marg_eff_shape(n, d_y, d_t_final):
            return ((n, ) + ((d_y, ) if d_y > 0 else
                             ()) + ((d_t_final, ) if d_t_final > 0 else ()))

        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
        def const_marg_eff_shape(n, d_x, d_y, d_t_final):
            return ((n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) +
                    ((d_t_final, ) if d_t_final > 0 else ()))

        for d_t in [2, 1, -1]:
            n_t = d_t if d_t > 0 else 1
            for discrete_t in [True, False] if n_t == 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_q in [2, None]:
                        for d_z in [2, 1]:
                            if d_z < n_t:
                                continue
                            for discrete_z in [True, False
                                               ] if d_z == 1 else [False]:
                                Z1, Q, Y, T1 = [
                                    make_random(is_discrete, d)
                                    for is_discrete, d in [(
                                        discrete_z,
                                        d_z), (False,
                                               d_q), (False,
                                                      d_y), (discrete_t, d_t)]
                                ]
                                if discrete_t and discrete_z:
                                    # need to make sure we get all *joint* combinations
                                    arr = make_random(True, 2)
                                    Z1 = arr[:, 0].reshape(size(n, d_z))
                                    T1 = arr[:, 0].reshape(size(n, d_t))

                                d_t_final1 = 2 if discrete_t else d_t

                                if discrete_t:
                                    # IntentToTreat only supports binary treatments/instruments
                                    T2 = T1.copy()
                                    T2[T1 == 'c'] = np.random.choice(
                                        ['a', 'b'],
                                        size=np.count_nonzero(T1 == 'c'))
                                    d_t_final2 = 1
                                if discrete_z:
                                    # IntentToTreat only supports binary treatments/instruments
                                    Z2 = Z1.copy()
                                    Z2[Z1 == 'c'] = np.random.choice(
                                        ['a', 'b'],
                                        size=np.count_nonzero(Z1 == 'c'))

                                effect_shape = eff_shape(n, d_y)

                                model_t = LogisticRegression(
                                ) if discrete_t else Lasso()
                                model_z = LogisticRegression(
                                ) if discrete_z else Lasso()

                                all_infs = [None, BootstrapInference(1)]

                                estimators = [
                                    (DMLATEIV(model_Y_W=Lasso(),
                                              model_T_W=model_t,
                                              model_Z_W=model_z,
                                              discrete_treatment=discrete_t,
                                              discrete_instrument=discrete_z),
                                     True, all_infs),
                                    (ProjectedDMLATEIV(
                                        model_Y_W=Lasso(),
                                        model_T_W=model_t,
                                        model_T_WZ=model_t,
                                        discrete_treatment=discrete_t,
                                        discrete_instrument=discrete_z), False,
                                     all_infs),
                                    (DMLIV(model_Y_X=Lasso(),
                                           model_T_X=model_t,
                                           model_T_XZ=model_t,
                                           model_final=Lasso(),
                                           discrete_treatment=discrete_t,
                                           discrete_instrument=discrete_z),
                                     False, all_infs)
                                ]

                                if d_q and discrete_t and discrete_z:
                                    # IntentToTreat requires X
                                    estimators.append((LinearIntentToTreatDRIV(
                                        model_Y_X=Lasso(),
                                        model_T_XZ=model_t,
                                        flexible_model_effect=WeightedLasso(),
                                        cv=2), False, all_infs + ['auto']))

                                for est, multi, infs in estimators:
                                    if not (
                                            multi
                                    ) and d_y > 1 or d_t > 1 or d_z > 1:
                                        continue

                                    # ensure we can serialize unfit estimator
                                    pickle.dumps(est)

                                    d_ws = [None]
                                    if isinstance(est,
                                                  LinearIntentToTreatDRIV):
                                        d_ws.append(2)

                                    for d_w in d_ws:
                                        W = make_random(False, d_w)

                                        for inf in infs:
                                            with self.subTest(
                                                    d_z=d_z,
                                                    d_x=d_q,
                                                    d_y=d_y,
                                                    d_t=d_t,
                                                    discrete_t=discrete_t,
                                                    discrete_z=discrete_z,
                                                    est=est,
                                                    inf=inf):
                                                Z = Z1
                                                T = T1
                                                d_t_final = d_t_final1
                                                X = Q
                                                d_x = d_q

                                                if isinstance(
                                                        est,
                                                    (DMLATEIV,
                                                     ProjectedDMLATEIV)):
                                                    # these support only W but not X
                                                    W = Q
                                                    X = None
                                                    d_x = None

                                                    def fit():
                                                        return est.fit(
                                                            Y,
                                                            T,
                                                            Z=Z,
                                                            W=W,
                                                            inference=inf)

                                                    def score():
                                                        return est.score(Y,
                                                                         T,
                                                                         Z=Z,
                                                                         W=W)
                                                else:
                                                    # these support only binary, not general discrete T and Z
                                                    if discrete_t:
                                                        T = T2
                                                        d_t_final = d_t_final2

                                                    if discrete_z:
                                                        Z = Z2

                                                    if isinstance(
                                                            est,
                                                            LinearIntentToTreatDRIV
                                                    ):

                                                        def fit():
                                                            return est.fit(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                W=W,
                                                                inference=inf)

                                                        def score():
                                                            return est.score(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                W=W)
                                                    else:

                                                        def fit():
                                                            return est.fit(
                                                                Y,
                                                                T,
                                                                Z=Z,
                                                                X=X,
                                                                inference=inf)

                                                        def score():
                                                            return est.score(
                                                                Y, T, Z=Z, X=X)

                                                marginal_effect_shape = marg_eff_shape(
                                                    n, d_y, d_t_final)
                                                const_marginal_effect_shape = const_marg_eff_shape(
                                                    n, d_x, d_y, d_t_final)

                                                fit()

                                                # ensure we can serialize fit estimator
                                                pickle.dumps(est)

                                                # make sure we can call the marginal_effect and effect methods
                                                const_marg_eff = est.const_marginal_effect(
                                                    X)
                                                marg_eff = est.marginal_effect(
                                                    T, X)
                                                self.assertEqual(
                                                    shape(marg_eff),
                                                    marginal_effect_shape)
                                                self.assertEqual(
                                                    shape(const_marg_eff),
                                                    const_marginal_effect_shape
                                                )

                                                np.testing.assert_array_equal(
                                                    marg_eff
                                                    if d_x else marg_eff[0:1],
                                                    const_marg_eff)

                                                T0 = np.full_like(
                                                    T, 'a'
                                                ) if discrete_t else np.zeros_like(
                                                    T)
                                                eff = est.effect(X,
                                                                 T0=T0,
                                                                 T1=T)
                                                self.assertEqual(
                                                    shape(eff), effect_shape)

                                                # TODO: add tests for extra properties like coef_ where they exist

                                                if inf is not None:
                                                    const_marg_eff_int = est.const_marginal_effect_interval(
                                                        X)
                                                    marg_eff_int = est.marginal_effect_interval(
                                                        T, X)
                                                    self.assertEqual(
                                                        shape(marg_eff_int),
                                                        (2, ) +
                                                        marginal_effect_shape)
                                                    self.assertEqual(
                                                        shape(
                                                            const_marg_eff_int
                                                        ), (2, ) +
                                                        const_marginal_effect_shape
                                                    )
                                                    self.assertEqual(
                                                        shape(
                                                            est.
                                                            effect_interval(
                                                                X, T0=T0,
                                                                T1=T)),
                                                        (2, ) + effect_shape)

                                                # TODO: add tests for extra properties like coef_ where they exist

                                                score()

                                                # make sure we can call effect with implied scalar treatments,
                                                # no matter the dimensions of T, and also that we warn when there
                                                # are multiple treatments
                                                if d_t > 1:
                                                    cm = self.assertWarns(
                                                        Warning)
                                                else:
                                                    # ExitStack can be used as a "do nothing" ContextManager
                                                    cm = ExitStack()
                                                with cm:
                                                    effect_shape2 = (
                                                        n if d_x else 1, ) + (
                                                            (d_y, )
                                                            if d_y > 0 else ())
                                                    eff = est.effect(
                                                        X
                                                    ) if not discrete_t else est.effect(
                                                        X, T0='a', T1='b')
                                                    self.assertEqual(
                                                        shape(eff),
                                                        effect_shape2)
Ejemplo n.º 12
0
    def test_cate_api(self):
        def const_marg_eff_shape(n, d_x, d_y, binary_T):
            return (n if d_x else 1, ) + ((d_y, ) if d_y > 1 else
                                          ()) + ((1, ) if binary_T else ())

        def marg_eff_shape(n, d_y, binary_T):
            return (n, ) + ((d_y, ) if d_y > 1 else
                            ()) + ((1, ) if binary_T else ())

        def eff_shape(n, d_x, d_y):
            return (n if d_x else 1, ) + ((d_y, ) if d_y > 1 else ())

        n = 1000
        y = np.random.normal(size=(n, ))

        for d_y in [1, 3]:
            if d_y == 1:
                y = np.random.normal(size=(n, ))
            else:
                y = y = np.random.normal(size=(n, d_y))
            for d_w in [None, 10]:
                if d_w is None:
                    W = None
                else:
                    W = np.random.normal(size=(n, d_w))
                for d_x in [None, 3]:
                    if d_x is None:
                        X = None
                    else:
                        X = np.random.normal(size=(n, d_x))
                    for binary_Z in [True, False]:
                        if binary_Z:
                            Z = np.random.choice([3, 4], size=(n, ))
                        else:
                            Z = np.random.normal(1, 3, size=(n, ))
                        for binary_T in [True, False]:
                            if binary_T:
                                T = np.random.choice([0, 1], size=(n, ))
                            else:
                                T = np.random.uniform(1, 3,
                                                      size=(n, )) + 0.5 * Z
                            for featurizer in [
                                    None,
                                    PolynomialFeatures(degree=2,
                                                       include_bias=False),
                            ]:
                                est_list = [
                                    OrthoIV(
                                        projection=False,
                                        featurizer=featurizer,
                                        discrete_treatment=binary_T,
                                        discrete_instrument=binary_Z,
                                    ),
                                    OrthoIV(
                                        projection=True,
                                        featurizer=featurizer,
                                        discrete_treatment=binary_T,
                                        discrete_instrument=binary_Z,
                                    ),
                                    DMLIV(
                                        model_final=LinearRegression(
                                            fit_intercept=False),
                                        featurizer=featurizer,
                                        discrete_treatment=binary_T,
                                        discrete_instrument=binary_Z,
                                    ),
                                    NonParamDMLIV(
                                        model_final=RandomForestRegressor(),
                                        featurizer=featurizer,
                                        discrete_treatment=binary_T,
                                        discrete_instrument=binary_Z,
                                    ),
                                ]

                                if X is None:
                                    est_list = est_list[:-1]

                                for est in est_list:
                                    with self.subTest(d_w=d_w,
                                                      d_x=d_x,
                                                      binary_T=binary_T,
                                                      binary_Z=binary_Z,
                                                      featurizer=featurizer,
                                                      est=est):

                                        # ensure we can serialize unfit estimator
                                        pickle.dumps(est)

                                        est.fit(y, T, Z=Z, X=X, W=W)

                                        # ensure we can serialize fit estimator
                                        pickle.dumps(est)

                                        # expected effect size
                                        const_marginal_effect_shape = const_marg_eff_shape(
                                            n, d_x, d_y, binary_T)
                                        marginal_effect_shape = marg_eff_shape(
                                            n, d_y, binary_T)
                                        effect_shape = eff_shape(n, d_x, d_y)
                                        # test effect
                                        const_marg_eff = est.const_marginal_effect(
                                            X)
                                        self.assertEqual(
                                            shape(const_marg_eff),
                                            const_marginal_effect_shape)
                                        marg_eff = est.marginal_effect(T, X)
                                        self.assertEqual(
                                            shape(marg_eff),
                                            marginal_effect_shape)
                                        eff = est.effect(X, T0=0, T1=1)
                                        self.assertEqual(
                                            shape(eff), effect_shape)

                                        # test inference
                                        # only OrthoIV support inference other than bootstrap
                                        if isinstance(est, OrthoIV):
                                            const_marg_eff_int = est.const_marginal_effect_interval(
                                                X)
                                            marg_eff_int = est.marginal_effect_interval(
                                                T, X)
                                            eff_int = est.effect_interval(X,
                                                                          T0=0,
                                                                          T1=1)
                                            self.assertEqual(
                                                shape(const_marg_eff_int),
                                                (2, ) +
                                                const_marginal_effect_shape)
                                            self.assertEqual(
                                                shape(marg_eff_int),
                                                (2, ) + marginal_effect_shape)
                                            self.assertEqual(
                                                shape(eff_int),
                                                (2, ) + effect_shape)

                                        # test summary
                                        if isinstance(est, (OrthoIV, DMLIV)):
                                            est.summary()

                                        # test can run score
                                        est.score(y, T, Z, X=X, W=W)

                                        if X is not None:
                                            # test cate_feature_names
                                            expect_feat_len = featurizer.fit(
                                                X
                                            ).n_output_features_ if featurizer else d_x
                                            self.assertEqual(
                                                len(est.cate_feature_names()),
                                                expect_feat_len)

                                            # test can run shap values
                                            shap_values = est.shap_values(
                                                X[:10])
Ejemplo n.º 13
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d >= 0 else (n, )
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 3 and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_t in [2, 1, -1]:
            for is_discrete in [True, False] if d_t <= 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_x in [2, None]:
                        for d_w in [2, None]:
                            W, X, Y, T = [
                                make_random(is_discrete, d)
                                for is_discrete, d in [(
                                    False,
                                    d_w), (False,
                                           d_x), (False,
                                                  d_y), (is_discrete, d_t)]
                            ]

                            d_t_final = 2 if is_discrete else d_t

                            effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ())
                            marginal_effect_shape = ((n, ) + (
                                (d_y, ) if d_y > 0 else
                                ()) + ((d_t_final, ) if d_t_final > 0 else ()))

                            # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                            const_marginal_effect_shape = (
                                (n if d_x else 1, ) + ((d_y, ) if d_y > 0 else
                                                       ()) +
                                ((d_t_final, ) if d_t_final > 0 else ()))

                            model_t = LogisticRegression(
                            ) if is_discrete else Lasso()

                            # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
                            all_infs = [None, 'statsmodels']
                            if not is_discrete:
                                all_infs.append(BootstrapInference(1))

                            for est, multi, infs in [
                                (LinearDMLCateEstimator(
                                    model_y=Lasso(),
                                    model_t='auto',
                                    discrete_treatment=is_discrete), False,
                                 all_infs),
                                (SparseLinearDMLCateEstimator(
                                    model_y=LinearRegression(),
                                    model_t=model_t,
                                    discrete_treatment=is_discrete), True,
                                 [None]),
                                (KernelDMLCateEstimator(
                                    model_y=LinearRegression(),
                                    model_t=model_t,
                                    discrete_treatment=is_discrete), False,
                                 [None])
                            ]:

                                if not (multi) and d_y > 1:
                                    continue

                                for inf in infs:
                                    with self.subTest(d_w=d_w,
                                                      d_x=d_x,
                                                      d_y=d_y,
                                                      d_t=d_t,
                                                      is_discrete=is_discrete,
                                                      est=est,
                                                      inf=inf):
                                        est.fit(Y, T, X, W, inference=inf)
                                        # make sure we can call the marginal_effect and effect methods
                                        const_marg_eff = est.const_marginal_effect(
                                            X)
                                        marg_eff = est.marginal_effect(T, X)
                                        self.assertEqual(
                                            shape(marg_eff),
                                            marginal_effect_shape)
                                        self.assertEqual(
                                            shape(const_marg_eff),
                                            const_marginal_effect_shape)

                                        np.testing.assert_array_equal(
                                            marg_eff if d_x else marg_eff[0:1],
                                            const_marg_eff)

                                        T0 = np.full_like(
                                            T, 'a'
                                        ) if is_discrete else np.zeros_like(T)
                                        eff = est.effect(X, T0=T0, T1=T)
                                        self.assertEqual(
                                            shape(eff), effect_shape)

                                        if inf is not None:
                                            const_marg_eff_int = est.const_marginal_effect_interval(
                                                X)
                                            marg_eff_int = est.marginal_effect_interval(
                                                T, X)
                                            self.assertEqual(
                                                shape(marg_eff_int),
                                                (2, ) + marginal_effect_shape)
                                            self.assertEqual(
                                                shape(const_marg_eff_int),
                                                (2, ) +
                                                const_marginal_effect_shape)
                                            self.assertEqual(
                                                shape(
                                                    est.effect_interval(X,
                                                                        T0=T0,
                                                                        T1=T)),
                                                (2, ) + effect_shape)

                                        est.score(Y, T, X, W)

                                        # make sure we can call effect with implied scalar treatments, no matter the
                                        # dimensions of T, and also that we warn when there are multiple treatments
                                        if d_t > 1:
                                            cm = self.assertWarns(Warning)
                                        else:
                                            cm = ExitStack(
                                            )  # ExitStack can be used as a "do nothing" ContextManager
                                        with cm:
                                            effect_shape2 = (
                                                n if d_x else 1, ) + (
                                                    (d_y, ) if d_y > 0 else ())
                                            eff = est.effect(
                                                X
                                            ) if not is_discrete else est.effect(
                                                X, T0='a', T1='b')
                                            self.assertEqual(
                                                shape(eff), effect_shape2)
Ejemplo n.º 14
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d > 0 else (n,)
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 3 and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_y in [0, 1]:
            is_discrete = True
            for d_t in [0, 1]:
                for d_x in [2, None]:
                    for d_w in [2, None]:
                        W, X, Y, T = [make_random(is_discrete, d)
                                      for is_discrete, d in [(False, d_w),
                                                             (False, d_x),
                                                             (False, d_y),
                                                             (is_discrete, d_t)]]

                        if (X is None) and (W is None):
                            continue
                        d_t_final = 2 if is_discrete else d_t

                        effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
                        effect_summaryframe_shape = (
                            n * (d_y if d_y > 0 else 1), 6)
                        marginal_effect_shape = ((n,) +
                                                 ((d_y,) if d_y > 0 else ()) +
                                                 ((d_t_final,) if d_t_final > 0 else ()))
                        marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1),
                                                              6 * (d_t_final if d_t_final > 0 else 1))

                        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                        const_marginal_effect_shape = ((n if d_x else 1,) +
                                                       ((d_y,) if d_y > 0 else ()) +
                                                       ((d_t_final,) if d_t_final > 0 else()))
                        const_marginal_effect_summaryframe_shape = (
                            (n if d_x else 1) * (d_y if d_y > 0 else 1),
                            6 * (d_t_final if d_t_final > 0 else 1))

                        for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs',
                                                                                        multi_class='auto')),
                                    DRLearner(model_propensity=LogisticRegression(multi_class='auto'),
                                              model_regression=LinearRegression(),
                                              model_final=StatsModelsLinearRegression(),
                                              multitask_model_final=True)]:

                            # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
                            infs = [None]
                            if isinstance(est, LinearDRLearner):
                                infs.append('statsmodels')

                            for inf in infs:
                                with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
                                                  is_discrete=is_discrete, est=est, inf=inf):
                                    est.fit(Y, T, X, W, inference=inf)
                                    # make sure we can call the marginal_effect and effect methods
                                    const_marg_eff = est.const_marginal_effect(
                                        X)
                                    marg_eff = est.marginal_effect(T, X)
                                    self.assertEqual(
                                        shape(marg_eff), marginal_effect_shape)
                                    self.assertEqual(
                                        shape(const_marg_eff), const_marginal_effect_shape)

                                    np.testing.assert_array_equal(
                                        marg_eff if d_x else marg_eff[0:1], const_marg_eff)

                                    T0 = np.full_like(T, 'a')
                                    eff = est.effect(X, T0=T0, T1=T)
                                    self.assertEqual(shape(eff), effect_shape)
                                    if inf is not None:
                                        const_marg_eff_int = est.const_marginal_effect_interval(
                                            X)
                                        marg_eff_int = est.marginal_effect_interval(
                                            T, X)
                                        const_marg_effect_inf = est.const_marginal_effect_inference(
                                            X)
                                        T1 = np.full_like(T, 'b')
                                        effect_inf = est.effect_inference(
                                            X, T0=T0, T1=T1)
                                        marg_effect_inf = est.marginal_effect_inference(
                                            T, X)
                                        self.assertEqual(shape(marg_eff_int),
                                                         (2,) + marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_eff_int),
                                                         (2,) + const_marginal_effect_shape)
                                        self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
                                                         (2,) + effect_shape)

                                        # test const marginal inference
                                        self.assertEqual(shape(const_marg_effect_inf.summary_frame()),
                                                         const_marginal_effect_summaryframe_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.point_estimate),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.stderr),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.var),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.pvalue()),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.zstat()),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.conf_int()),
                                                         (2,) + const_marginal_effect_shape)
                                        np.testing.assert_array_almost_equal(const_marg_effect_inf.conf_int()
                                                                             [0], const_marg_eff_int[0], decimal=5)
                                        const_marg_effect_inf.population_summary()._repr_html_()

                                        # test effect inference
                                        self.assertEqual(shape(effect_inf.summary_frame()),
                                                         effect_summaryframe_shape)
                                        self.assertEqual(shape(effect_inf.point_estimate),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.stderr),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.var),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.pvalue()),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.zstat()),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.conf_int()),
                                                         (2,) + effect_shape)
                                        np.testing.assert_array_almost_equal(effect_inf.conf_int()
                                                                             [0], est.effect_interval(
                                                                                 X, T0=T0, T1=T1)
                                                                             [0], decimal=5)
                                        effect_inf.population_summary()._repr_html_()

                                        # test marginal effect inference
                                        self.assertEqual(shape(marg_effect_inf.summary_frame()),
                                                         marginal_effect_summaryframe_shape)
                                        self.assertEqual(shape(marg_effect_inf.point_estimate),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.stderr),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.var),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.pvalue()),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.zstat()),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.conf_int()),
                                                         (2,) + marginal_effect_shape)
                                        np.testing.assert_array_almost_equal(marg_effect_inf.conf_int()
                                                                             [0], marg_eff_int[0], decimal=5)
                                        marg_effect_inf.population_summary()._repr_html_()

                                    est.score(Y, T, X, W)

                                    # make sure we can call effect with implied scalar treatments, no matter the
                                    # dimensions of T, and also that we warn when there are multiple treatments
                                    if d_t > 1:
                                        cm = self.assertWarns(Warning)
                                    else:
                                        cm = ExitStack()  # ExitStack can be used as a "do nothing" ContextManager
                                    with cm:
                                        effect_shape2 = (
                                            n if d_x else 1,) + ((d_y,) if d_y > 0 else())
                                        eff = est.effect(X, T0='a', T1='b')
                                        self.assertEqual(
                                            shape(eff), effect_shape2)
Ejemplo n.º 15
0
    def test_cate_api_nonparam(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d >= 0 else (n, )
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 2 and counts.min() > 2:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_t in [1, -1]:
            for is_discrete in [True, False] if d_t <= 1 else [False]:
                for d_y in [3, 1, -1]:
                    for d_x in [2, None]:
                        for d_w in [2, None]:
                            W, X, Y, T = [
                                make_random(is_discrete, d)
                                for is_discrete, d in [(
                                    False,
                                    d_w), (False,
                                           d_x), (False,
                                                  d_y), (is_discrete, d_t)]
                            ]

                            d_t_final = 1 if is_discrete else d_t

                            effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ())
                            marginal_effect_shape = ((n, ) + (
                                (d_y, ) if d_y > 0 else
                                ()) + ((d_t_final, ) if d_t_final > 0 else ()))

                            # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                            const_marginal_effect_shape = (
                                (n if d_x else 1, ) + ((d_y, ) if d_y > 0 else
                                                       ()) +
                                ((d_t_final, ) if d_t_final > 0 else ()))

                            model_t = LogisticRegression(
                            ) if is_discrete else WeightedLasso()

                            # TODO Add bootstrap inference, once discrete treatment issue is fixed
                            base_infs = [None]
                            if not is_discrete:
                                base_infs += [BootstrapInference(2)]
                            for est, multi, infs in [
                                (NonParamDMLCateEstimator(
                                    model_y=WeightedLasso(),
                                    model_t=model_t,
                                    model_final=WeightedLasso(),
                                    featurizer=None,
                                    discrete_treatment=is_discrete), True,
                                 base_infs),
                                (NonParamDMLCateEstimator(
                                    model_y=WeightedLasso(),
                                    model_t=model_t,
                                    model_final=WeightedLasso(),
                                    featurizer=FunctionTransformer(),
                                    discrete_treatment=is_discrete), True,
                                 base_infs),
                                (ForestDMLCateEstimator(
                                    model_y=WeightedLasso(),
                                    model_t=model_t,
                                    discrete_treatment=is_discrete), True,
                                 base_infs + ['blb'])
                            ]:

                                if not (multi) and d_y > 1:
                                    continue

                                for inf in infs:
                                    with self.subTest(d_w=d_w,
                                                      d_x=d_x,
                                                      d_y=d_y,
                                                      d_t=d_t,
                                                      is_discrete=is_discrete,
                                                      est=est,
                                                      inf=inf):
                                        if X is None:
                                            with pytest.raises(AttributeError):
                                                est.fit(Y,
                                                        T,
                                                        X,
                                                        W,
                                                        inference=inf)
                                            continue

                                        est.fit(Y, T, X, W, inference=inf)
                                        # make sure we can call the marginal_effect and effect methods
                                        const_marg_eff = est.const_marginal_effect(
                                            X)
                                        marg_eff = est.marginal_effect(T, X)
                                        self.assertEqual(
                                            shape(marg_eff),
                                            marginal_effect_shape)
                                        self.assertEqual(
                                            shape(const_marg_eff),
                                            const_marginal_effect_shape)

                                        np.testing.assert_array_equal(
                                            marg_eff if d_x else marg_eff[0:1],
                                            const_marg_eff)

                                        T0 = np.full_like(
                                            T, 'a'
                                        ) if is_discrete else np.zeros_like(T)
                                        eff = est.effect(X, T0=T0, T1=T)
                                        self.assertEqual(
                                            shape(eff), effect_shape)

                                        if inf is not None:
                                            const_marg_eff_int = est.const_marginal_effect_interval(
                                                X)
                                            marg_eff_int = est.marginal_effect_interval(
                                                T, X)
                                            self.assertEqual(
                                                shape(marg_eff_int),
                                                (2, ) + marginal_effect_shape)
                                            self.assertEqual(
                                                shape(const_marg_eff_int),
                                                (2, ) +
                                                const_marginal_effect_shape)
                                            self.assertEqual(
                                                shape(
                                                    est.effect_interval(X,
                                                                        T0=T0,
                                                                        T1=T)),
                                                (2, ) + effect_shape)

                                        est.score(Y, T, X, W)

                                        # make sure we can call effect with implied scalar treatments, no matter the
                                        # dimensions of T, and also that we warn when there are multiple treatments
                                        if d_t > 1:
                                            cm = self.assertWarns(Warning)
                                        else:
                                            cm = ExitStack(
                                            )  # ExitStack can be used as a "do nothing" ContextManager
                                        with cm:
                                            effect_shape2 = (
                                                n if d_x else 1, ) + (
                                                    (d_y, ) if d_y > 0 else ())
                                            eff = est.effect(
                                                X
                                            ) if not is_discrete else est.effect(
                                                X, T0='a', T1='b')
                                            self.assertEqual(
                                                shape(eff), effect_shape2)
Ejemplo n.º 16
0
 def transform(self, X):
     assert self._is_fitted
     assert shape(X)[1] == 0
     return np.tile(self._features, (shape(X)[0], 1, 1, 1))
Ejemplo n.º 17
0
 def fit(self, X):
     self._is_fitted = True
     assert shape(X)[1] == 0
     return self
Ejemplo n.º 18
0
 def transform(self, X):
     return np.ones((shape(X)[0], 1))
Ejemplo n.º 19
0
    def test_cate_api(self):
        def const_marg_eff_shape(n, d_x, binary_T):
            """Constant marginal effect shape."""
            return (n if d_x else 1,) + ((1,) if binary_T else ())

        def marg_eff_shape(n, binary_T):
            """Marginal effect shape."""
            return (n,) + ((1,) if binary_T else ())

        def eff_shape(n, d_x):
            "Effect shape."
            return (n if d_x else 1,)

        n = 500
        y = np.random.normal(size=(n,))

        # parameter combinations to test
        for d_w, d_x, binary_T, binary_Z, projection, featurizer\
            in itertools.product(
                [None, 10],     # d_w
                [None, 3],      # d_x
                [True, False],  # binary_T
                [True, False],  # binary_Z
                [True, False],  # projection
                [None, PolynomialFeatures(degree=2, include_bias=False), ]):    # featurizer

            if d_w is None:
                W = None
            else:
                W = np.random.normal(size=(n, d_w))

            if d_x is None:
                X = None
            else:
                X = np.random.normal(size=(n, d_x))

            if binary_T:
                T = np.random.choice(["a", "b"], size=(n,))
            else:
                T = np.random.normal(size=(n,))

            if binary_Z:
                Z = np.random.choice(["c", "d"], size=(n,))
            else:
                Z = np.random.normal(size=(n,))

            est_list = [
                DRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    model_final=StatsModelsLinearRegression(
                        fit_intercept=False
                    ),
                    fit_cate_intercept=True,
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
                LinearDRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    fit_cate_intercept=True,
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
                SparseLinearDRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    fit_cate_intercept=True,
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
                ForestDRIV(
                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                    projection=projection,
                    discrete_instrument=binary_Z,
                    discrete_treatment=binary_T,
                    featurizer=featurizer,
                ),
            ]

            if X is None:
                est_list = est_list[:-1]

            if binary_T and binary_Z:
                est_list += [
                    IntentToTreatDRIV(
                        flexible_model_effect=StatsModelsLinearRegression(
                            fit_intercept=False
                        ),
                        fit_cate_intercept=True,
                        featurizer=featurizer,
                    ),
                    LinearIntentToTreatDRIV(
                        flexible_model_effect=StatsModelsLinearRegression(
                            fit_intercept=False
                        ),
                        featurizer=featurizer,
                    ),
                ]

            for est in est_list:
                with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T,
                                  binary_Z=binary_Z, projection=projection, featurizer=featurizer,
                                  est=est):

                    # TODO: serializing/deserializing for every combination -- is this necessary?
                    # ensure we can serialize unfit estimator
                    pickle.dumps(est)

                    est.fit(y, T, Z=Z, X=X, W=W)

                    # ensure we can serialize fit estimator
                    pickle.dumps(est)

                    # expected effect size
                    exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
                    marginal_effect_shape = marg_eff_shape(n, binary_T)
                    effect_shape = eff_shape(n, d_x)

                    # assert calculated constant marginal effect shape is expected
                    # const_marginal effect is defined in LinearCateEstimator class
                    const_marg_eff = est.const_marginal_effect(X)
                    self.assertEqual(shape(const_marg_eff), exp_const_marginal_effect_shape)

                    # assert calculated marginal effect shape is expected
                    marg_eff = est.marginal_effect(T, X)
                    self.assertEqual(shape(marg_eff), marginal_effect_shape)

                    T0 = "a" if binary_T else 0
                    T1 = "b" if binary_T else 1
                    eff = est.effect(X, T0=T0, T1=T1)
                    self.assertEqual(shape(eff), effect_shape)

                    # test inference
                    const_marg_eff_int = est.const_marginal_effect_interval(X)
                    marg_eff_int = est.marginal_effect_interval(T, X)
                    eff_int = est.effect_interval(X, T0=T0, T1=T1)
                    self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape)
                    self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
                    self.assertEqual(shape(eff_int), (2,) + effect_shape)

                    # test can run score
                    est.score(y, T, Z=Z, X=X, W=W)

                    if X is not None:
                        # test cate_feature_names
                        expect_feat_len = featurizer.fit(
                            X).n_output_features_ if featurizer else d_x
                        self.assertEqual(len(est.cate_feature_names()), expect_feat_len)

                        # test can run shap values
                        _ = est.shap_values(X[:10])
Ejemplo n.º 20
0
    def test_cate_api(self):
        def const_marg_eff_shape(n, d_x, binary_T):
            return (n if d_x else 1, ) + ((1, ) if binary_T else ())

        def marg_eff_shape(n, binary_T):
            return (n, ) + ((1, ) if binary_T else ())

        def eff_shape(n, d_x):
            return (n if d_x else 1, )

        n = 1000
        y = np.random.normal(size=(n, ))

        for d_w in [None, 10]:
            if d_w is None:
                W = None
            else:
                W = np.random.normal(size=(n, d_w))
            for d_x in [None, 3]:
                if d_x is None:
                    X = None
                else:
                    X = np.random.normal(size=(n, d_x))
                for binary_T in [True, False]:
                    if binary_T:
                        T = np.random.choice(["a", "b"], size=(n, ))
                    else:
                        T = np.random.normal(size=(n, ))
                    for binary_Z in [True, False]:
                        if binary_Z:
                            Z = np.random.choice(["c", "d"], size=(n, ))
                        else:
                            Z = np.random.normal(size=(n, ))
                        for projection in [True, False]:
                            for featurizer in [
                                    None,
                                    PolynomialFeatures(degree=2,
                                                       include_bias=False),
                            ]:
                                est_list = [
                                    DRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        model_final=StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        fit_cate_intercept=True,
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                    LinearDRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        fit_cate_intercept=True,
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                    SparseLinearDRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        fit_cate_intercept=True,
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                    ForestDRIV(
                                        flexible_model_effect=
                                        StatsModelsLinearRegression(
                                            fit_intercept=False),
                                        projection=projection,
                                        discrete_instrument=binary_Z,
                                        discrete_treatment=binary_T,
                                        featurizer=featurizer,
                                    ),
                                ]

                                if X is None:
                                    est_list = est_list[:-1]

                                if binary_T and binary_Z:
                                    est_list += [
                                        IntentToTreatDRIV(
                                            flexible_model_effect=
                                            StatsModelsLinearRegression(
                                                fit_intercept=False),
                                            fit_cate_intercept=True,
                                            featurizer=featurizer,
                                        ),
                                        LinearIntentToTreatDRIV(
                                            flexible_model_effect=
                                            StatsModelsLinearRegression(
                                                fit_intercept=False),
                                            featurizer=featurizer,
                                        ),
                                    ]

                                for est in est_list:
                                    with self.subTest(d_w=d_w,
                                                      d_x=d_x,
                                                      binary_T=binary_T,
                                                      binary_Z=binary_Z,
                                                      projection=projection,
                                                      featurizer=featurizer,
                                                      est=est):

                                        # ensure we can serialize unfit estimator
                                        pickle.dumps(est)

                                        est.fit(y, T, Z=Z, X=X, W=W)

                                        # ensure we can serialize fit estimator
                                        pickle.dumps(est)

                                        # expected effect size
                                        const_marginal_effect_shape = const_marg_eff_shape(
                                            n, d_x, binary_T)
                                        marginal_effect_shape = marg_eff_shape(
                                            n, binary_T)
                                        effect_shape = eff_shape(n, d_x)
                                        # test effect
                                        const_marg_eff = est.const_marginal_effect(
                                            X)
                                        self.assertEqual(
                                            shape(const_marg_eff),
                                            const_marginal_effect_shape)
                                        marg_eff = est.marginal_effect(T, X)
                                        self.assertEqual(
                                            shape(marg_eff),
                                            marginal_effect_shape)
                                        T0 = "a" if binary_T else 0
                                        T1 = "b" if binary_T else 1
                                        eff = est.effect(X, T0=T0, T1=T1)
                                        self.assertEqual(
                                            shape(eff), effect_shape)

                                        # test inference
                                        const_marg_eff_int = est.const_marginal_effect_interval(
                                            X)
                                        marg_eff_int = est.marginal_effect_interval(
                                            T, X)
                                        eff_int = est.effect_interval(X,
                                                                      T0=T0,
                                                                      T1=T1)
                                        self.assertEqual(
                                            shape(const_marg_eff_int), (2, ) +
                                            const_marginal_effect_shape)
                                        self.assertEqual(
                                            shape(marg_eff_int),
                                            (2, ) + marginal_effect_shape)
                                        self.assertEqual(
                                            shape(eff_int),
                                            (2, ) + effect_shape)

                                        # test can run score
                                        est.score(y, T, Z=Z, X=X, W=W)

                                        if X is not None:
                                            # test cate_feature_names
                                            expect_feat_len = featurizer.fit(
                                                X
                                            ).n_output_features_ if featurizer else d_x
                                            self.assertEqual(
                                                len(est.cate_feature_names()),
                                                expect_feat_len)

                                            # test can run shap values
                                            shap_values = est.shap_values(
                                                X[:10])