Esempio n. 1
0
    def test_all_kinds(self):
        T = [1, 0, 1, 2, 0, 2] * 5
        Y = [1, 2, 3, 4, 5, 6] * 5
        X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1)
        est = LinearDMLCateEstimator(n_splits=2)
        for kind in ['percentile', 'pivot', 'normal']:
            with self.subTest(kind=kind):
                inference = BootstrapInference(n_bootstrap_samples=5,
                                               bootstrap_type=kind)
                est.fit(Y, T, inference=inference)
                i = est.const_marginal_effect_interval()
                inf = est.const_marginal_effect_inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                est.fit(Y, T, X=X, inference=inference)
                i = est.const_marginal_effect_interval(X)
                inf = est.const_marginal_effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.coef__interval()
                inf = est.coef__inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.effect_interval(X)
                inf = est.effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])
Esempio n. 2
0
    def test_can_use_statsmodel_inference(self):
        """Test that we can use statsmodels to generate confidence intervals"""
        dml = LinearDMLCateEstimator(LinearRegression(),
                                     LogisticRegression(C=1000),
                                     discrete_treatment=True)
        dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]),
                np.array([3, 2, 1, 2, 3, 1, 1, 1]),
                np.ones((8, 1)),
                inference='statsmodels')
        interval = dml.effect_interval(np.ones((9, 1)),
                                       T0=np.array([1, 1, 1, 2, 2, 2, 3, 3,
                                                    3]),
                                       T1=np.array([1, 2, 3, 1, 2, 3, 1, 2,
                                                    3]),
                                       alpha=0.05)
        point = dml.effect(np.ones((9, 1)),
                           T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]),
                           T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.const_marginal_effect_interval(np.ones((9, 1)),
                                                      alpha=0.05)
        point = dml.const_marginal_effect(np.ones((9, 1)))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.coef__interval(alpha=0.05)
        point = dml.coef_
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.intercept__interval(alpha=0.05)
        point = dml.intercept_
        assert len(interval) == 2
        lo, hi = interval
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width
Esempio n. 3
0
    def test_dml_multi_dim_treatment_outcome(self):
        """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """
        from econml.dml import LinearDMLCateEstimator
        from econml.inference import StatsModelsInference
        np.random.seed(123)
        n = 100000
        precision = .01
        precision_int = .0001
        with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True):
            for d in [2, 5]:  # n_feats + n_controls
                for d_x in [2]:  # n_feats
                    for p in [1, 5]:  # n_outcomes
                        for q in [1, 5]:  # n_treatments
                            X = np.random.binomial(1, .5, size=(n, d))
                            T = np.hstack([np.random.binomial(1, .5 + .2 * (2 * X[:, [1]] - 1)) for _ in range(q)])

                            def true_effect(x, i):
                                return np.hstack([x[:, [0]] + 10 * t + i for t in range(p)])
                            y = np.sum((true_effect(X, i) * T[:, [i]] for i in range(q)), axis=0) + X[:, [0] * p]
                            if p == 1:
                                y = y.flatten()
                            est = LinearDMLCateEstimator(model_y=LinearRegression(),
                                                         model_t=LinearRegression(),
                                                         linear_first_stages=False)
                            est.fit(y, T, X[:, :d_x], X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust'))
                            coef = est.coef_.reshape(p, q, d_x + 1)
                            lower, upper = est.coef__interval(alpha=.001)
                            lower = lower.reshape(p, q, d_x + 1)
                            upper = upper.reshape(p, q, d_x + 1)
                            for i in range(p):
                                for j in range(q):
                                    assert np.abs(coef[i, j, 0] - 10 * i - j) < precision, (coef[i, j, 0], 10 * i + j)
                                    assert ((lower[i, j, 0] <= 10 * i + j + precision_int) &
                                            (upper[i, j, 0] >= 10 * i + j - precision_int)),\
                                        (lower[i, j, 0], upper[i, j, 0], 10 * i + j)
                                    assert np.abs(coef[i, j, 1] - 1) < precision, (coef[i, j, 1], 1)
                                    assert ((lower[i, j, 1] <= 1 + precision_int) &
                                            (upper[i, j, 1] >= 1 - precision_int)), \
                                        (lower[i, j, 1], upper[i, j, 1])
                                    assert np.all(np.abs(coef[i, j, 2:]) < precision)
                                    assert np.all((lower[i, j, 2:] <= precision_int) &
                                                  (upper[i, j, 2:] >= -precision_int)),\
                                        (np.max(lower[i, j, 2:]),
                                         np.min(upper[i, j, 2:]))
                            XT = np.hstack([X, T])
                            (X1, X2, y1, y2,
                             X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec,
                             var_first, var_sec) = _summarize(XT, y)
                            X = np.vstack([X1, X2])
                            y = np.concatenate((y1, y2))
                            X_final = np.vstack([X_final_first, X_final_sec])
                            y_sum = np.concatenate((y_sum_first, y_sum_sec))
                            n_sum = np.concatenate((n_sum_first, n_sum_sec))
                            var_sum = np.concatenate((var_first, var_sec))
                            first_half_sum = len(y_sum_first)

                            class SplitterSum:
                                def __init__(self):
                                    return

                                def split(self, X, T):
                                    return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])),
                                            (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))]
                            est = LinearDMLCateEstimator(
                                model_y=LinearRegression(),
                                model_t=LinearRegression(),
                                n_splits=SplitterSum(),
                                linear_first_stages=False,
                                discrete_treatment=False).fit(y_sum,
                                                              X_final[:, d:],
                                                              X_final[:, :d_x],
                                                              X_final[:, d_x:d],
                                                              sample_weight=n_sum,
                                                              sample_var=var_sum,
                                                              inference=StatsModelsInference(cov_type='nonrobust'))
                            coef = est.coef_.reshape(p, q, d_x + 1)
                            lower, upper = est.coef__interval(alpha=.001)
                            lower = lower.reshape(p, q, d_x + 1)
                            upper = upper.reshape(p, q, d_x + 1)
                            for i in range(p):
                                for j in range(q):
                                    assert np.abs(coef[i, j, 0] - 10 * i - j) < precision, (coef[i, j, 0], 10 * i + j)
                                    assert ((lower[i, j, 0] <= 10 * i + j + precision_int) &
                                            (upper[i, j, 0] >= 10 * i + j - precision_int)), \
                                        (lower[i, j, 0], upper[i, j, 0], 10 * i + j)
                                    assert np.abs(coef[i, j, 1] - 1) < precision, (coef[i, j, 1], 1)
                                    assert ((lower[i, j, 1] <= 1 + precision_int) &
                                            (upper[i, j, 1] >= 1 - precision_int)), \
                                        (lower[i, j, 1], upper[i, j, 1])
                                    assert np.all(np.abs(coef[i, j, 2:]) < precision)
                                    assert np.all((lower[i, j, 2:] <= precision_int) &
                                                  (upper[i, j, 2:] >= -precision_int)), \
                                        (np.max(lower[i, j, 2:]), np.min(upper[i, j, 2:]))