Exemple #1
0
    def _test_sparse(n_p, d_w, n_r):
        # need at least as many rows in e_y as there are distinct columns
        # in [X;X⊗W;W⊗W;X⊗e_t] to find a solution for e_t
        assert n_p * n_r >= 2 * n_p + n_p * d_w + d_w * (d_w + 1) / 2
        a = np.random.normal(size=(n_p,))  # one effect per product
        n = n_p * n_r
        p = np.tile(range(n_p), n_r)  # product id

        b = np.random.normal(size=(d_w + n_p,))
        g = np.random.normal(size=(d_w + n_p,))

        x = np.empty((2 * n, n_p))  # product dummies
        w = np.empty((2 * n, d_w))
        y = np.empty(2 * n)
        t = np.empty(2 * n)

        for fold in range(0, 2):
            x_f = OneHotEncoder().fit_transform(np.reshape(p, (-1, 1))).toarray()
            w_f = np.random.normal(size=(n, d_w))
            xw_f = hstack([x_f, w_f])
            e_t_f, e_y_f = TestDML._generate_recoverable_errors(a, x_f, W=w_f)

            t_f = xw_f @ b + e_t_f
            y_f = t_f * np.choose(p, a) + xw_f @ g + e_y_f

            x[fold * n:(fold + 1) * n, :] = x_f
            w[fold * n:(fold + 1) * n, :] = w_f
            y[fold * n:(fold + 1) * n] = y_f
            t[fold * n:(fold + 1) * n] = t_f

        dml = SparseLinearDMLCateEstimator(LinearRegression(fit_intercept=False), LinearRegression(
            fit_intercept=False), featurizer=FunctionTransformer())
        dml.fit(y, t, x, w)

        # note that this would fail for the non-sparse DMLCateEstimator

        np.testing.assert_allclose(a, dml.coef_.reshape(-1))
        eff = reshape(t * np.choose(np.tile(p, 2), a), (-1, 1))
        np.testing.assert_allclose(eff, dml.effect(0, t, x))

        dml = SparseLinearDMLCateEstimator(LinearRegression(fit_intercept=False),
                                           LinearRegression(fit_intercept=False),
                                           featurizer=Pipeline([("id", FunctionTransformer()),
                                                                ("matrix", MatrixFeatures(1, 1))]))
        dml.fit(y, t, x, w)
        np.testing.assert_allclose(eff, dml.effect(0, t, x))
Exemple #2
0
 def test_linear_sparse(self):
     """SparseDML test with a sparse DGP"""
     # Sparse DGP
     np.random.seed(123)
     n_x = 50
     n_nonzero = 5
     n_w = 5
     n = 1000
     # Treatment effect coef
     a = np.zeros(n_x)
     nonzero_idx = np.random.choice(n_x, size=n_nonzero, replace=False)
     a[nonzero_idx] = 1
     # Other coefs
     b = np.zeros(n_x + n_w)
     g = np.zeros(n_x + n_w)
     b_nonzero = np.random.choice(n_x + n_w, size=n_nonzero, replace=False)
     g_nonzero = np.random.choice(n_x + n_w, size=n_nonzero, replace=False)
     b[b_nonzero] = 1
     g[g_nonzero] = 1
     # Features and controls
     x = np.random.normal(size=(n, n_x))
     w = np.random.normal(size=(n, n_w))
     xw = np.hstack([x, w])
     err_T = np.random.normal(size=n)
     T = xw @ b + err_T
     err_Y = np.random.normal(size=n, scale=0.5)
     Y = T * (x @ a) + xw @ g + err_Y
     # Test sparse estimator
     # --> test coef_, intercept_
     sparse_dml = SparseLinearDMLCateEstimator(fit_cate_intercept=False)
     sparse_dml.fit(Y, T, x, w, inference='debiasedlasso')
     np.testing.assert_allclose(a, sparse_dml.coef_, atol=2e-1)
     with pytest.raises(AttributeError):
         sparse_dml.intercept_
     # --> test treatment effects
     # Restrict x_test to vectors of norm < 1
     x_test = np.random.uniform(size=(10, n_x))
     true_eff = (x_test @ a)
     eff = sparse_dml.effect(x_test, T0=0, T1=1)
     np.testing.assert_allclose(true_eff, eff, atol=0.5)
     # --> check inference
     y_lower, y_upper = sparse_dml.effect_interval(x_test, T0=0, T1=1)
     in_CI = ((y_lower < true_eff) & (true_eff < y_upper))
     # Check that a majority of true effects lie in the 5-95% CI
     self.assertTrue(in_CI.mean() > 0.8)