def test_all_kinds(self): T = [1, 0, 1, 2, 0, 2] * 5 Y = [1, 2, 3, 4, 5, 6] * 5 X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1) est = LinearDMLCateEstimator(n_splits=2) for kind in ['percentile', 'pivot', 'normal']: with self.subTest(kind=kind): inference = BootstrapInference(n_bootstrap_samples=5, bootstrap_type=kind) est.fit(Y, T, inference=inference) i = est.const_marginal_effect_interval() inf = est.const_marginal_effect_inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) est.fit(Y, T, X=X, inference=inference) i = est.const_marginal_effect_interval(X) inf = est.const_marginal_effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.coef__interval() inf = est.coef__inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.effect_interval(X) inf = est.effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1])
def test_can_use_statsmodel_inference(self): """Test that we can use statsmodels to generate confidence intervals""" dml = LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), discrete_treatment=True) dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)), inference='statsmodels') interval = dml.effect_interval(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]), alpha=0.05) point = dml.effect(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.const_marginal_effect_interval(np.ones((9, 1)), alpha=0.05) point = dml.const_marginal_effect(np.ones((9, 1))) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.coef__interval(alpha=0.05) point = dml.coef_ assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.intercept__interval(alpha=0.05) point = dml.intercept_ assert len(interval) == 2 lo, hi = interval assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width
def test_dml_multi_dim_treatment_outcome(self): """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """ from econml.dml import LinearDMLCateEstimator from econml.inference import StatsModelsInference np.random.seed(123) n = 100000 precision = .01 precision_int = .0001 with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True): for d in [2, 5]: # n_feats + n_controls for d_x in [2]: # n_feats for p in [1, 5]: # n_outcomes for q in [1, 5]: # n_treatments X = np.random.binomial(1, .5, size=(n, d)) T = np.hstack([np.random.binomial(1, .5 + .2 * (2 * X[:, [1]] - 1)) for _ in range(q)]) def true_effect(x, i): return np.hstack([x[:, [0]] + 10 * t + i for t in range(p)]) y = np.sum((true_effect(X, i) * T[:, [i]] for i in range(q)), axis=0) + X[:, [0] * p] if p == 1: y = y.flatten() est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) est.fit(y, T, X[:, :d_x], X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust')) coef = est.coef_.reshape(p, q, d_x + 1) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x + 1) upper = upper.reshape(p, q, d_x + 1) for i in range(p): for j in range(q): assert np.abs(coef[i, j, 0] - 10 * i - j) < precision, (coef[i, j, 0], 10 * i + j) assert ((lower[i, j, 0] <= 10 * i + j + precision_int) & (upper[i, j, 0] >= 10 * i + j - precision_int)),\ (lower[i, j, 0], upper[i, j, 0], 10 * i + j) assert np.abs(coef[i, j, 1] - 1) < precision, (coef[i, j, 1], 1) assert ((lower[i, j, 1] <= 1 + precision_int) & (upper[i, j, 1] >= 1 - precision_int)), \ (lower[i, j, 1], upper[i, j, 1]) assert np.all(np.abs(coef[i, j, 2:]) < precision) assert np.all((lower[i, j, 2:] <= precision_int) & (upper[i, j, 2:] >= -precision_int)),\ (np.max(lower[i, j, 2:]), np.min(upper[i, j, 2:])) XT = np.hstack([X, T]) (X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec, var_first, var_sec) = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack([X_final_first, X_final_sec]) y_sum = np.concatenate((y_sum_first, y_sum_sec)) n_sum = np.concatenate((n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) class SplitterSum: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])), (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))] est = LinearDMLCateEstimator( model_y=LinearRegression(), model_t=LinearRegression(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False).fit(y_sum, X_final[:, d:], X_final[:, :d_x], X_final[:, d_x:d], sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference(cov_type='nonrobust')) coef = est.coef_.reshape(p, q, d_x + 1) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x + 1) upper = upper.reshape(p, q, d_x + 1) for i in range(p): for j in range(q): assert np.abs(coef[i, j, 0] - 10 * i - j) < precision, (coef[i, j, 0], 10 * i + j) assert ((lower[i, j, 0] <= 10 * i + j + precision_int) & (upper[i, j, 0] >= 10 * i + j - precision_int)), \ (lower[i, j, 0], upper[i, j, 0], 10 * i + j) assert np.abs(coef[i, j, 1] - 1) < precision, (coef[i, j, 1], 1) assert ((lower[i, j, 1] <= 1 + precision_int) & (upper[i, j, 1] >= 1 - precision_int)), \ (lower[i, j, 1], upper[i, j, 1]) assert np.all(np.abs(coef[i, j, 2:]) < precision) assert np.all((lower[i, j, 2:] <= precision_int) & (upper[i, j, 2:] >= -precision_int)), \ (np.max(lower[i, j, 2:]), np.min(upper[i, j, 2:]))