def test_all_kinds(self): T = [1, 0, 1, 2, 0, 2] * 5 Y = [1, 2, 3, 4, 5, 6] * 5 X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1) est = LinearDML(n_splits=2) for kind in ['percentile', 'pivot', 'normal']: with self.subTest(kind=kind): inference = BootstrapInference(n_bootstrap_samples=5, bootstrap_type=kind) est.fit(Y, T, inference=inference) i = est.const_marginal_effect_interval() inf = est.const_marginal_effect_inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) est.fit(Y, T, X=X, inference=inference) i = est.const_marginal_effect_interval(X) inf = est.const_marginal_effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.coef__interval() inf = est.coef__inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.effect_interval(X) inf = est.effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1])
def test_dml_multi_dim_treatment_outcome(self): """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """ from econml.dml import LinearDML from econml.inference import StatsModelsInference np.random.seed(123) n = 100000 precision = .01 precision_int = .0001 with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True): for d in [2, 5]: # n_feats + n_controls for d_x in [1]: # n_feats for p in [1, 5]: # n_outcomes for q in [1, 5]: # n_treatments X = np.random.binomial(1, .5, size=(n, d)) T = np.hstack([np.random.binomial(1, .5 + .2 * (2 * X[:, [1]] - 1)) for _ in range(q)]) def true_effect(x, i): return np.hstack([x[:, [0]] + 10 * t + i for t in range(p)]) y = np.sum((true_effect(X, i) * T[:, [i]] for i in range(q)), axis=0) + X[:, [0] * p] if p == 1: y = y.flatten() est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) est.fit(y, T, X=X[:, :d_x], W=X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust')) intercept = est.intercept_.reshape((p, q)) lower_int, upper_int = est.intercept__interval(alpha=.001) lower_int = lower_int.reshape((p, q)) upper_int = upper_int.reshape((p, q)) coef = est.coef_.reshape(p, q, d_x) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x) upper = upper.reshape(p, q, d_x) for i in range(p): for j in range(q): np.testing.assert_allclose(intercept[i, j], 10 * i + j, rtol=0, atol=precision) np.testing.assert_array_less(lower_int[i, j], 10 * i + j + precision_int) np.testing.assert_array_less(10 * i + j - precision_int, upper_int[i, j]) np.testing.assert_allclose(coef[i, j, 0], 1, atol=precision) np.testing.assert_array_less(lower[i, j, 0], 1) np.testing.assert_array_less(1, upper[i, j, 0]) np.testing.assert_allclose(coef[i, j, 1:], np.zeros(coef[i, j, 1:].shape), atol=precision) np.testing.assert_array_less(lower[i, j, 1:], np.zeros(lower[i, j, 1:].shape) + precision_int) np.testing.assert_array_less(np.zeros(lower[i, j, 1:].shape) - precision_int, upper[i, j, 1:]) est = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False, featurizer=PolynomialFeatures(degree=1), fit_cate_intercept=False) est.fit(y, T, X=X[:, :d_x], W=X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust')) with pytest.raises(AttributeError) as e_info: intercept = est.intercept_ with pytest.raises(AttributeError) as e_info: intercept = est.intercept__interval(alpha=0.05) coef = est.coef_.reshape(p, q, d_x + 1) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x + 1) upper = upper.reshape(p, q, d_x + 1) for i in range(p): for j in range(q): np.testing.assert_allclose(coef[i, j, 0], 10 * i + j, rtol=0, atol=precision) np.testing.assert_array_less(lower[i, j, 0], 10 * i + j + precision_int) np.testing.assert_array_less(10 * i + j - precision_int, upper[i, j, 0]) np.testing.assert_allclose(coef[i, j, 1], 1, atol=precision) np.testing.assert_array_less(lower[i, j, 1], 1) np.testing.assert_array_less(1, upper[i, j, 1]) np.testing.assert_allclose(coef[i, j, 2:], np.zeros(coef[i, j, 2:].shape), atol=precision) np.testing.assert_array_less(lower[i, j, 2:], np.zeros(lower[i, j, 2:].shape) + precision_int) np.testing.assert_array_less(np.zeros(lower[i, j, 2:].shape) - precision_int, upper[i, j, 2:]) XT = np.hstack([X, T]) (X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec, var_first, var_sec) = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack([X_final_first, X_final_sec]) y_sum = np.concatenate((y_sum_first, y_sum_sec)) n_sum = np.concatenate((n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) class SplitterSum: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])), (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))] est = LinearDML( model_y=LinearRegression(), model_t=LinearRegression(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False).fit(y_sum, X_final[:, d:], X_final[:, :d_x], X_final[:, d_x:d], sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference(cov_type='nonrobust')) intercept = est.intercept_.reshape((p, q)) lower_int, upper_int = est.intercept__interval(alpha=.001) lower_int = lower_int.reshape((p, q)) upper_int = upper_int.reshape((p, q)) coef = est.coef_.reshape(p, q, d_x) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x) upper = upper.reshape(p, q, d_x) for i in range(p): for j in range(q): np.testing.assert_allclose(intercept[i, j], 10 * i + j, rtol=0, atol=precision) np.testing.assert_array_less(lower_int[i, j], 10 * i + j + precision_int) np.testing.assert_array_less(10 * i + j - precision_int, upper_int[i, j]) np.testing.assert_allclose(coef[i, j, 0], 1, atol=precision) np.testing.assert_array_less(lower[i, j, 0], 1) np.testing.assert_array_less(1, upper[i, j, 0]) np.testing.assert_allclose(coef[i, j, 1:], np.zeros(coef[i, j, 1:].shape), atol=precision) np.testing.assert_array_less(lower[i, j, 1:], np.zeros(lower[i, j, 1:].shape) + precision_int) np.testing.assert_array_less(np.zeros(lower[i, j, 1:].shape) - precision_int, upper[i, j, 1:])