Esempio n. 1
0
    def test_with_econml(self):
        """Test that we can bootstrap econml estimators."""
        x = np.random.normal(size=(1000, 2))
        t = np.random.normal(size=(1000, 1))
        t2 = np.random.normal(size=(1000, 1))
        y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1))

        est = LinearDMLCateEstimator(LinearRegression(), LinearRegression())
        est.fit(y, t, x)

        bs = BootstrapEstimator(est, 50)
        # test that we can fit with the same arguments as the base estimator
        bs.fit(y, t, x)

        # test that we can get the same attribute for the bootstrap as the original, with the same shape
        self.assertEqual(np.shape(est.coef_), np.shape(bs.coef_))

        # test that we can get an interval for the same attribute for the bootstrap as the original,
        # with the same shape for the lower and upper bounds
        lower, upper = bs.coef__interval()
        for bound in [lower, upper]:
            self.assertEqual(np.shape(est.coef_), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that we can do the same thing once we provide percentile bounds
        lower, upper = bs.coef__interval(lower=10, upper=90)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(est.coef_), np.shape(bound))

        # test that we can do the same thing with the results of a method, rather than an attribute
        self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)),
                         np.shape(bs.effect(x, T0=t, T1=t2)))

        # test that we can get an interval for the same attribute for the bootstrap as the original,
        # with the same shape for the lower and upper bounds
        lower, upper = bs.effect_interval(x, T0=t, T1=t2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)),
                             np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that we can do the same thing once we provide percentile bounds
        lower, upper = bs.effect_interval(x, T0=t, T1=t2, lower=10, upper=90)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)),
                             np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()
Esempio n. 2
0
    def test_can_use_statsmodel_inference(self):
        """Test that we can use statsmodels to generate confidence intervals"""
        dml = LinearDMLCateEstimator(LinearRegression(),
                                     LogisticRegression(C=1000),
                                     discrete_treatment=True)
        dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]),
                np.array([3, 2, 1, 2, 3, 1, 1, 1]),
                np.ones((8, 1)),
                inference='statsmodels')
        interval = dml.effect_interval(np.ones((9, 1)),
                                       T0=np.array([1, 1, 1, 2, 2, 2, 3, 3,
                                                    3]),
                                       T1=np.array([1, 2, 3, 1, 2, 3, 1, 2,
                                                    3]),
                                       alpha=0.05)
        point = dml.effect(np.ones((9, 1)),
                           T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]),
                           T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.const_marginal_effect_interval(np.ones((9, 1)),
                                                      alpha=0.05)
        point = dml.const_marginal_effect(np.ones((9, 1)))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.coef__interval(alpha=0.05)
        point = dml.coef_
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.intercept__interval(alpha=0.05)
        point = dml.intercept_
        assert len(interval) == 2
        lo, hi = interval
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width
Esempio n. 3
0
 def test_discrete_treatments(self):
     """Test that we can use discrete treatments"""
     dml = LinearDMLCateEstimator(LinearRegression(),
                                  LogisticRegression(C=1000),
                                  featurizer=FunctionTransformer(),
                                  discrete_treatment=True)
     # create a simple artificial setup where effect of moving from treatment
     #     1 -> 2 is 2,
     #     1 -> 3 is 1, and
     #     2 -> 3 is -1 (necessarily, by composing the previous two effects)
     # Using an uneven number of examples from different classes,
     # and having the treatments in non-lexicographic order,
     # Should rule out some basic issues.
     dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]),
             np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
     np.testing.assert_almost_equal(
         dml.effect(np.ones((9, 1)), np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]),
                    np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])),
         [0, 2, 1, -2, 0, -1, -1, 1, 0])
     dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]),
               np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
Esempio n. 4
0
    def test_internal(self):
        """Test that the internal use of bootstrap within an estimator works."""
        x = np.random.normal(size=(1000, 2))
        t = np.random.normal(size=(1000, 1))
        t2 = np.random.normal(size=(1000, 1))
        y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1))

        est = LinearDMLCateEstimator(LinearRegression(), LinearRegression())
        est.fit(y, t, x, inference='bootstrap')

        # test that we can get an interval for the same attribute for the bootstrap as the original,
        # with the same shape for the lower and upper bounds
        eff = est.effect(x, T0=t, T1=t2)

        lower, upper = est.effect_interval(x, T0=t, T1=t2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(eff), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that the estimated effect is usually within the bounds
        assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.9

        # test that we can do the same thing once we provide alpha explicitly
        lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2)
        for bound in [lower, upper]:
            self.assertEqual(np.shape(eff), np.shape(bound))

        # test that the lower and upper bounds differ
        assert (lower <= upper).all()
        assert (lower < upper).any()

        # test that the estimated effect is usually within the bounds
        assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.8