예제 #1
    def test_all_kinds(self):
        T = [1, 0, 1, 2, 0, 2] * 5
        Y = [1, 2, 3, 4, 5, 6] * 5
        X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1)
        est = LinearDML(n_splits=2)
        for kind in ['percentile', 'pivot', 'normal']:
            with self.subTest(kind=kind):
                inference = BootstrapInference(n_bootstrap_samples=5,
                est.fit(Y, T, inference=inference)
                i = est.const_marginal_effect_interval()
                inf = est.const_marginal_effect_inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                est.fit(Y, T, X=X, inference=inference)
                i = est.const_marginal_effect_interval(X)
                inf = est.const_marginal_effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.coef__interval()
                inf = est.coef__inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.effect_interval(X)
                inf = est.effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])
예제 #2
    def test_dml_multi_dim_treatment_outcome(self):
        """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """
        from econml.dml import LinearDML
        from econml.inference import StatsModelsInference
        n = 100000
        precision = .01
        precision_int = .0001
        with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True):
            for d in [2, 5]:  # n_feats + n_controls
                for d_x in [1]:  # n_feats
                    for p in [1, 5]:  # n_outcomes
                        for q in [1, 5]:  # n_treatments
                            X = np.random.binomial(1, .5, size=(n, d))
                            T = np.hstack([np.random.binomial(1, .5 + .2 * (2 * X[:, [1]] - 1)) for _ in range(q)])

                            def true_effect(x, i):
                                return np.hstack([x[:, [0]] + 10 * t + i for t in range(p)])
                            y = np.sum((true_effect(X, i) * T[:, [i]] for i in range(q)), axis=0) + X[:, [0] * p]
                            if p == 1:
                                y = y.flatten()
                            est = LinearDML(model_y=LinearRegression(),
                            est.fit(y, T, X=X[:, :d_x], W=X[:, d_x:],
                            intercept = est.intercept_.reshape((p, q))
                            lower_int, upper_int = est.intercept__interval(alpha=.001)
                            lower_int = lower_int.reshape((p, q))
                            upper_int = upper_int.reshape((p, q))
                            coef = est.coef_.reshape(p, q, d_x)
                            lower, upper = est.coef__interval(alpha=.001)
                            lower = lower.reshape(p, q, d_x)
                            upper = upper.reshape(p, q, d_x)
                            for i in range(p):
                                for j in range(q):
                                    np.testing.assert_allclose(intercept[i, j], 10 * i + j, rtol=0, atol=precision)
                                    np.testing.assert_array_less(lower_int[i, j], 10 * i + j + precision_int)
                                    np.testing.assert_array_less(10 * i + j - precision_int, upper_int[i, j])
                                    np.testing.assert_allclose(coef[i, j, 0], 1, atol=precision)
                                    np.testing.assert_array_less(lower[i, j, 0], 1)
                                    np.testing.assert_array_less(1, upper[i, j, 0])
                                    np.testing.assert_allclose(coef[i, j, 1:], np.zeros(coef[i, j, 1:].shape),
                                    np.testing.assert_array_less(lower[i, j, 1:],
                                                                 np.zeros(lower[i, j, 1:].shape) + precision_int)
                                    np.testing.assert_array_less(np.zeros(lower[i, j, 1:].shape) - precision_int,
                                                                 upper[i, j, 1:])

                            est = LinearDML(model_y=LinearRegression(),
                            est.fit(y, T, X=X[:, :d_x], W=X[:, d_x:],
                            with pytest.raises(AttributeError) as e_info:
                                intercept = est.intercept_
                            with pytest.raises(AttributeError) as e_info:
                                intercept = est.intercept__interval(alpha=0.05)
                            coef = est.coef_.reshape(p, q, d_x + 1)
                            lower, upper = est.coef__interval(alpha=.001)
                            lower = lower.reshape(p, q, d_x + 1)
                            upper = upper.reshape(p, q, d_x + 1)
                            for i in range(p):
                                for j in range(q):
                                    np.testing.assert_allclose(coef[i, j, 0], 10 * i + j, rtol=0, atol=precision)
                                    np.testing.assert_array_less(lower[i, j, 0], 10 * i + j + precision_int)
                                    np.testing.assert_array_less(10 * i + j - precision_int, upper[i, j, 0])
                                    np.testing.assert_allclose(coef[i, j, 1], 1, atol=precision)
                                    np.testing.assert_array_less(lower[i, j, 1], 1)
                                    np.testing.assert_array_less(1, upper[i, j, 1])
                                    np.testing.assert_allclose(coef[i, j, 2:], np.zeros(coef[i, j, 2:].shape),
                                    np.testing.assert_array_less(lower[i, j, 2:],
                                                                 np.zeros(lower[i, j, 2:].shape) + precision_int)
                                    np.testing.assert_array_less(np.zeros(lower[i, j, 2:].shape) - precision_int,
                                                                 upper[i, j, 2:])
                            XT = np.hstack([X, T])
                            (X1, X2, y1, y2,
                             X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec,
                             var_first, var_sec) = _summarize(XT, y)
                            X = np.vstack([X1, X2])
                            y = np.concatenate((y1, y2))
                            X_final = np.vstack([X_final_first, X_final_sec])
                            y_sum = np.concatenate((y_sum_first, y_sum_sec))
                            n_sum = np.concatenate((n_sum_first, n_sum_sec))
                            var_sum = np.concatenate((var_first, var_sec))
                            first_half_sum = len(y_sum_first)

                            class SplitterSum:
                                def __init__(self):

                                def split(self, X, T):
                                    return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])),
                                            (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))]
                            est = LinearDML(
                                                              X_final[:, d:],
                                                              X_final[:, :d_x],
                                                              X_final[:, d_x:d],
                            intercept = est.intercept_.reshape((p, q))
                            lower_int, upper_int = est.intercept__interval(alpha=.001)
                            lower_int = lower_int.reshape((p, q))
                            upper_int = upper_int.reshape((p, q))
                            coef = est.coef_.reshape(p, q, d_x)
                            lower, upper = est.coef__interval(alpha=.001)
                            lower = lower.reshape(p, q, d_x)
                            upper = upper.reshape(p, q, d_x)
                            for i in range(p):
                                for j in range(q):
                                    np.testing.assert_allclose(intercept[i, j], 10 * i + j, rtol=0, atol=precision)
                                    np.testing.assert_array_less(lower_int[i, j], 10 * i + j + precision_int)
                                    np.testing.assert_array_less(10 * i + j - precision_int, upper_int[i, j])
                                    np.testing.assert_allclose(coef[i, j, 0], 1, atol=precision)
                                    np.testing.assert_array_less(lower[i, j, 0], 1)
                                    np.testing.assert_array_less(1, upper[i, j, 0])
                                    np.testing.assert_allclose(coef[i, j, 1:], np.zeros(coef[i, j, 1:].shape),
                                    np.testing.assert_array_less(lower[i, j, 1:],
                                                                 np.zeros(lower[i, j, 1:].shape) + precision_int)
                                    np.testing.assert_array_less(np.zeros(lower[i, j, 1:].shape) - precision_int,
                                                                 upper[i, j, 1:])