Esempio n. 1
0
    def test_all_kinds(self):
        T = [1, 0, 1, 2, 0, 2] * 5
        Y = [1, 2, 3, 4, 5, 6] * 5
        X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1)
        est = LinearDMLCateEstimator(n_splits=2)
        for kind in ['percentile', 'pivot', 'normal']:
            with self.subTest(kind=kind):
                inference = BootstrapInference(n_bootstrap_samples=5,
                                               bootstrap_type=kind)
                est.fit(Y, T, inference=inference)
                i = est.const_marginal_effect_interval()
                inf = est.const_marginal_effect_inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                est.fit(Y, T, X=X, inference=inference)
                i = est.const_marginal_effect_interval(X)
                inf = est.const_marginal_effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.coef__interval()
                inf = est.coef__inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.effect_interval(X)
                inf = est.effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])
Esempio n. 2
0
    def test_can_use_statsmodel_inference(self):
        """Test that we can use statsmodels to generate confidence intervals"""
        dml = LinearDMLCateEstimator(LinearRegression(),
                                     LogisticRegression(C=1000),
                                     discrete_treatment=True)
        dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]),
                np.array([3, 2, 1, 2, 3, 1, 1, 1]),
                np.ones((8, 1)),
                inference='statsmodels')
        interval = dml.effect_interval(np.ones((9, 1)),
                                       T0=np.array([1, 1, 1, 2, 2, 2, 3, 3,
                                                    3]),
                                       T1=np.array([1, 2, 3, 1, 2, 3, 1, 2,
                                                    3]),
                                       alpha=0.05)
        point = dml.effect(np.ones((9, 1)),
                           T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]),
                           T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.const_marginal_effect_interval(np.ones((9, 1)),
                                                      alpha=0.05)
        point = dml.const_marginal_effect(np.ones((9, 1)))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.coef__interval(alpha=0.05)
        point = dml.coef_
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.intercept__interval(alpha=0.05)
        point = dml.intercept_
        assert len(interval) == 2
        lo, hi = interval
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width
Esempio n. 3
0
    def test_internal(self):
        """Test that the internal use of bootstrap within an estimator works."""
        x = np.random.normal(size=(1000, 2))
        t = np.random.normal(size=(1000, 1))
        t2 = np.random.normal(size=(1000, 1))
        y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1))

        est = LinearDMLCateEstimator(LinearRegression(), LinearRegression())
        est.fit(y, t, x, inference='bootstrap')

        # test that we can get an interval for the same attribute for the bootstrap as the original,
        # with the same shape for the lower and upper bounds
        eff = est.effect(x, T0=t, T1=t2)

        lower, upper = est.effect_interval(x, T0=t, T1=t2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(eff), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that the estimated effect is usually within the bounds
        assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.9

        # test that we can do the same thing once we provide alpha explicitly
        lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(eff), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that the estimated effect is usually within the bounds
        assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.8