def test_can_use_vectors(self): """Test that we can pass vectors for T and Y (not only 2-dimensional arrays).""" dml = LinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer()) dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1))) self.assertAlmostEqual(dml.coef_.reshape(())[()], 1)
def test_can_use_sample_weights(self): """Test that we can pass sample weights to an estimator.""" dml = LinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer()) dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)), sample_weight=np.ones((6, ))) self.assertAlmostEqual(dml.coef_.reshape(())[()], 1)
def test_can_use_statsmodel_inference(self): """Test that we can use statsmodels to generate confidence intervals""" dml = LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), discrete_treatment=True) dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)), inference='statsmodels') interval = dml.effect_interval(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]), alpha=0.05) point = dml.effect(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.const_marginal_effect_interval(np.ones((9, 1)), alpha=0.05) point = dml.const_marginal_effect(np.ones((9, 1))) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.coef__interval(alpha=0.05) point = dml.coef_ assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.intercept__interval(alpha=0.05) point = dml.intercept_ assert len(interval) == 2 lo, hi = interval assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width
def test_discrete_treatments(self): """Test that we can use discrete treatments""" dmls = [ LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), fit_cate_intercept=False, discrete_treatment=True), SparseLinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), fit_cate_intercept=False, discrete_treatment=True) ] for dml in dmls: # create a simple artificial setup where effect of moving from treatment # 1 -> 2 is 2, # 1 -> 3 is 1, and # 2 -> 3 is -1 (necessarily, by composing the previous two effects) # Using an uneven number of examples from different classes, # and having the treatments in non-lexicographic order, # Should rule out some basic issues. dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1))) np.testing.assert_almost_equal(dml.effect( np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])), [0, 2, 1, -2, 0, -1, -1, 1, 0], decimal=2) dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
def test_can_assign_treatment(self): n = 100 X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=(n, )) Y = np.random.normal(size=(n, )) est = LinearDMLCateEstimator(discrete_treatment=True) est.fit(Y, T, X) # can interpret without uncertainty intrp = SingleTreePolicyInterpreter() with self.assertRaises(Exception): # can't treat before interpreting intrp.treat(X) intrp.interpret(est, X) T_policy = intrp.treat(X) assert T.shape == T_policy.shape
def test_with_econml(self): """Test that we can bootstrap econml estimators.""" x = np.random.normal(size=(1000, 2)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) est = LinearDMLCateEstimator(LinearRegression(), LinearRegression()) est.fit(y, t, x) bs = BootstrapEstimator(est, 50) # test that we can fit with the same arguments as the base estimator bs.fit(y, t, x) # test that we can get the same attribute for the bootstrap as the original, with the same shape self.assertEqual(np.shape(est.coef_), np.shape(bs.coef_)) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = bs.coef__interval() for bound in [lower, upper]: self.assertEqual(np.shape(est.coef_), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that we can do the same thing once we provide percentile bounds lower, upper = bs.coef__interval(lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.coef_), np.shape(bound)) # test that we can do the same thing with the results of a method, rather than an attribute self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bs.effect(x, T0=t, T1=t2))) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = bs.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that we can do the same thing once we provide percentile bounds lower, upper = bs.effect_interval(x, T0=t, T1=t2, lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any()
def test_can_summarize(self): LinearDMLCateEstimator().fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference='statsmodels').summary() LinearDRLearner(fit_cate_intercept=False).fit( TestInference.Y, TestInference.T > 0, TestInference.X, TestInference.W, inference=BootstrapInference(5)).summary(1)
def test_can_use_interpreters(self): n = 100 for t_shape in [(n, ), (n, 1)]: for y_shape in [(n, ), (n, 1)]: X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=t_shape) Y = np.random.normal(size=y_shape) est = LinearDMLCateEstimator(discrete_treatment=True) est.fit(Y, T, X) for intrp in [ SingleTreeCateInterpreter(), SingleTreePolicyInterpreter() ]: with self.subTest(t_shape=t_shape, y_shape=y_shape, intrp=intrp): with self.assertRaises(Exception): # prior to calling interpret, can't plot, render, etc. intrp.plot() intrp.interpret(est, X) intrp.plot() intrp.render('tmp.pdf', view=False) intrp.export_graphviz()
def test_can_custom_splitter(self): # test that we can fit with a KFold instance dml = LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), discrete_treatment=True, n_splits=KFold()) dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1))) # test that we can fit with a train/test iterable dml = LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), discrete_treatment=True, n_splits=[([0, 1, 2], [3, 4, 5])]) dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
def test_inference_results(self): """Tests the inference results summary.""" # Test inference results when `cate_feature_names` doesn not exist cate_est = LinearDMLCateEstimator( featurizer=PolynomialFeatures(degree=1, include_bias=False)) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference='statsmodels') summary_results = wrapped_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal( coef_rows, ['X{}'.format(i) for i in range(TestInference.d_x)])
def test_can_use_sample_weights(self): """Test that we can pass sample weights to an estimator.""" dmls = [ LinearDMLCateEstimator(LinearRegression(), 'auto', fit_cate_intercept=False), SparseLinearDMLCateEstimator(LinearRegression(), 'auto', fit_cate_intercept=False) ] for dml in dmls: dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)), sample_weight=np.ones((6, ))) self.assertAlmostEqual(dml.coef_.reshape(())[()], 1)
def test_can_summarize(self): LinearDMLCateEstimator(model_t=LinearRegression(), model_y=LinearRegression()).fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference='statsmodels').summary() LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), fit_cate_intercept=False).fit( TestInference.Y, TestInference.T > 0, TestInference.X, TestInference.W, inference=BootstrapInference(5)).summary(1)
def test_can_use_vectors(self): """Test that we can pass vectors for T and Y (not only 2-dimensional arrays).""" dmls = [ LinearDMLCateEstimator(LinearRegression(), LinearRegression(), fit_cate_intercept=False), SparseLinearDMLCateEstimator(LinearRegression(), LinearRegression(), fit_cate_intercept=False) ] for dml in dmls: dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1))) self.assertAlmostEqual(dml.coef_.reshape(())[()], 1) score = dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1))) self.assertAlmostEqual(score, 0)
def test_bad_splits_discrete(self): """ Tests that when some training splits in a crossfit fold don't contain all treatments then an error is raised. """ Y = np.array([2, 3, 1, 3, 2, 1, 1, 1]) T = np.array([2, 2, 1, 2, 1, 1, 1, 1]) X = np.ones((8, 1)) est = LinearDMLCateEstimator(n_splits=[(np.arange(4, 8), np.arange(4))], discrete_treatment=True) with pytest.raises(AttributeError): est.fit(Y, T, X) Y = np.array([2, 3, 1, 3, 2, 1, 1, 1]) T = np.array([2, 2, 1, 2, 2, 2, 2, 2]) X = np.ones((8, 1)) est = LinearDMLCateEstimator(n_splits=[(np.arange(4, 8), np.arange(4))], discrete_treatment=True) with pytest.raises(AttributeError): est.fit(Y, T, X)
def test_cate_uncertainty_needs_inference(self): n = 100 X = np.random.normal(size=(n, 4)) T = np.random.binomial(1, 0.5, size=(n, )) Y = np.random.normal(size=(n, )) est = LinearDMLCateEstimator(discrete_treatment=True) est.fit(Y, T, X) # can interpret without uncertainty intrp = SingleTreeCateInterpreter() intrp.interpret(est, X) intrp = SingleTreeCateInterpreter(include_model_uncertainty=True) with self.assertRaises(Exception): # can't interpret with uncertainty if inference wasn't used during fit intrp.interpret(est, X) # can interpret with uncertainty if we refit est.fit(Y, T, X, inference='statsmodels') intrp.interpret(est, X)
def test_internal(self): """Test that the internal use of bootstrap within an estimator works.""" x = np.random.normal(size=(1000, 2)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) est = LinearDMLCateEstimator(LinearRegression(), LinearRegression()) est.fit(y, t, x, inference='bootstrap') # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds eff = est.effect(x, T0=t, T1=t2) lower, upper = est.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.9 # test that we can do the same thing once we provide alpha explicitly lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.8
def test_cate_api(self): """Test that we correctly implement the CATE API.""" n = 20 def make_random(is_discrete, d): if d is None: return None sz = (n, d) if d >= 0 else (n, ) if is_discrete: while True: arr = np.random.choice(['a', 'b', 'c'], size=sz) # ensure that we've got at least two of every element _, counts = np.unique(arr, return_counts=True) if len(counts) == 3 and counts.min() > 1: return arr else: return np.random.normal(size=sz) for d_t in [2, 1, -1]: for is_discrete in [True, False] if d_t <= 1 else [False]: for d_y in [3, 1, -1]: for d_x in [2, None]: for d_w in [2, None]: W, X, Y, T = [ make_random(is_discrete, d) for is_discrete, d in [( False, d_w), (False, d_x), (False, d_y), (is_discrete, d_t)] ] d_t_final = 2 if is_discrete else d_t effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ()) marginal_effect_shape = ((n, ) + ( (d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) # since T isn't passed to const_marginal_effect, defaults to one row if X is None const_marginal_effect_shape = ( (n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) model_t = LogisticRegression( ) if is_discrete else Lasso() # TODO: add stratification to bootstrap so that we can use it even with discrete treatments all_infs = [None, 'statsmodels'] if not is_discrete: all_infs.append(BootstrapInference(1)) for est, multi, infs in [ (LinearDMLCateEstimator( model_y=Lasso(), model_t='auto', discrete_treatment=is_discrete), False, all_infs), (SparseLinearDMLCateEstimator( model_y=LinearRegression(), model_t=model_t, discrete_treatment=is_discrete), True, [None]), (KernelDMLCateEstimator( model_y=LinearRegression(), model_t=model_t, discrete_treatment=is_discrete), False, [None]) ]: if not (multi) and d_y > 1: continue for inf in infs: with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, is_discrete=is_discrete, est=est, inf=inf): est.fit(Y, T, X, W, inference=inf) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect( X) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) np.testing.assert_array_equal( marg_eff if d_x else marg_eff[0:1], const_marg_eff) T0 = np.full_like( T, 'a' ) if is_discrete else np.zeros_like(T) eff = est.effect(X, T0=T0, T1=T) self.assertEqual( shape(eff), effect_shape) if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) self.assertEqual( shape(marg_eff_int), (2, ) + marginal_effect_shape) self.assertEqual( shape(const_marg_eff_int), (2, ) + const_marginal_effect_shape) self.assertEqual( shape( est.effect_interval(X, T0=T0, T1=T)), (2, ) + effect_shape) est.score(Y, T, X, W) # make sure we can call effect with implied scalar treatments, no matter the # dimensions of T, and also that we warn when there are multiple treatments if d_t > 1: cm = self.assertWarns(Warning) else: cm = ExitStack( ) # ExitStack can be used as a "do nothing" ContextManager with cm: effect_shape2 = ( n if d_x else 1, ) + ( (d_y, ) if d_y > 0 else ()) eff = est.effect( X ) if not is_discrete else est.effect( X, T0='a', T1='b') self.assertEqual( shape(eff), effect_shape2)
def test_dml_multi_dim_treatment_outcome(self): """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """ from econml.dml import LinearDMLCateEstimator from econml.inference import StatsModelsInference np.random.seed(123) n = 100000 precision = .01 precision_int = .0001 with np.printoptions(formatter={'float': '{:.4f}'.format}, suppress=True): for d in [2, 5]: # n_feats + n_controls for d_x in [2]: # n_feats for p in [1, 5]: # n_outcomes for q in [1, 5]: # n_treatments X = np.random.binomial(1, .5, size=(n, d)) T = np.hstack([np.random.binomial(1, .5 + .2 * (2 * X[:, [1]] - 1)) for _ in range(q)]) def true_effect(x, i): return np.hstack([x[:, [0]] + 10 * t + i for t in range(p)]) y = np.sum((true_effect(X, i) * T[:, [i]] for i in range(q)), axis=0) + X[:, [0] * p] if p == 1: y = y.flatten() est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) est.fit(y, T, X[:, :d_x], X[:, d_x:], inference=StatsModelsInference(cov_type='nonrobust')) coef = est.coef_.reshape(p, q, d_x + 1) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x + 1) upper = upper.reshape(p, q, d_x + 1) for i in range(p): for j in range(q): assert np.abs(coef[i, j, 0] - 10 * i - j) < precision, (coef[i, j, 0], 10 * i + j) assert ((lower[i, j, 0] <= 10 * i + j + precision_int) & (upper[i, j, 0] >= 10 * i + j - precision_int)),\ (lower[i, j, 0], upper[i, j, 0], 10 * i + j) assert np.abs(coef[i, j, 1] - 1) < precision, (coef[i, j, 1], 1) assert ((lower[i, j, 1] <= 1 + precision_int) & (upper[i, j, 1] >= 1 - precision_int)), \ (lower[i, j, 1], upper[i, j, 1]) assert np.all(np.abs(coef[i, j, 2:]) < precision) assert np.all((lower[i, j, 2:] <= precision_int) & (upper[i, j, 2:] >= -precision_int)),\ (np.max(lower[i, j, 2:]), np.min(upper[i, j, 2:])) XT = np.hstack([X, T]) (X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec, var_first, var_sec) = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack([X_final_first, X_final_sec]) y_sum = np.concatenate((y_sum_first, y_sum_sec)) n_sum = np.concatenate((n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) class SplitterSum: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])), (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))] est = LinearDMLCateEstimator( model_y=LinearRegression(), model_t=LinearRegression(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False).fit(y_sum, X_final[:, d:], X_final[:, :d_x], X_final[:, d_x:d], sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference(cov_type='nonrobust')) coef = est.coef_.reshape(p, q, d_x + 1) lower, upper = est.coef__interval(alpha=.001) lower = lower.reshape(p, q, d_x + 1) upper = upper.reshape(p, q, d_x + 1) for i in range(p): for j in range(q): assert np.abs(coef[i, j, 0] - 10 * i - j) < precision, (coef[i, j, 0], 10 * i + j) assert ((lower[i, j, 0] <= 10 * i + j + precision_int) & (upper[i, j, 0] >= 10 * i + j - precision_int)), \ (lower[i, j, 0], upper[i, j, 0], 10 * i + j) assert np.abs(coef[i, j, 1] - 1) < precision, (coef[i, j, 1], 1) assert ((lower[i, j, 1] <= 1 + precision_int) & (upper[i, j, 1] >= 1 - precision_int)), \ (lower[i, j, 1], upper[i, j, 1]) assert np.all(np.abs(coef[i, j, 2:]) < precision) assert np.all((lower[i, j, 2:] <= precision_int) & (upper[i, j, 2:] >= -precision_int)), \ (np.max(lower[i, j, 2:]), np.min(upper[i, j, 2:]))
def run_all_mc(first_stage, folder, n_list, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list): if not os.path.exists("results"): os.makedirs('results') results_filename = os.path.join("results", "{}.txt".format(folder)) np.random.seed(123) coverage_est = {} coverage_lr = {} n_tests = 0 n_failed_coef = 0 n_failed_effect = 0 cov_tol = .04 for n in n_list: for hetero_coef in hetero_coef_list: for d in d_list: for d_x in d_x_list: if d_x > d: continue for p in p_list: for d_t in t_list: X_test = np.unique(np.random.binomial(1, .5, size=(20, d_x)), axis=0) t0 = time.time() for it in range(n_exp): X = np.random.binomial(1, .8, size=(n, d)) T = np.hstack([ np.random.binomial(1, .5 * X[:, 0] + .25, size=(n, )).reshape( -1, 1) for _ in range(d_t) ]) true_coef = np.hstack([ np.hstack([ it + np.arange(p).reshape(-1, 1), it + np.ones((p, 1)), np.zeros((p, d_x - 1)) ]) for it in range(d_t) ]) def true_effect(x, t): return cross_product( np.hstack([ np.ones( (x.shape[0], 1)), x[:, :d_x] ]), t) @ true_coef.T y = true_effect(X, T) + X[:, [0] * p] +\ (hetero_coef * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p)) XT = np.hstack([X, T]) X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec,\ n_sum_first, n_sum_sec, var_first, var_sec = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack( [X_final_first, X_final_sec]) y_sum = np.concatenate( (y_sum_first, y_sum_sec)) n_sum = np.concatenate( (n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) first_half = len(y1) for cov_type in cov_type_list: class SplitterSum: def __init__(self): return def split(self, X, T): return [ (np.arange(0, first_half_sum), np.arange( first_half_sum, X.shape[0])), (np.arange( first_half_sum, X.shape[0]), np.arange(0, first_half_sum)) ] est = LinearDMLCateEstimator( model_y=first_stage(), model_t=first_stage(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False) est.fit(y_sum, X_final[:, -d_t:], X_final[:, :d_x], X_final[:, d_x:-d_t], sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference( cov_type=cov_type)) class Splitter: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half), np.arange( first_half, X.shape[0])), (np.arange( first_half, X.shape[0]), np.arange(0, first_half))] lr = LinearDMLCateEstimator( model_y=first_stage(), model_t=first_stage(), n_splits=Splitter(), linear_first_stages=False, discrete_treatment=False) lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t], inference=StatsModelsInference( cov_type=cov_type)) for alpha in alpha_list: key = ( "n_{}_n_exp_{}_hetero_{}_d_{}_d_x_" "{}_p_{}_d_t_{}_cov_type_{}_alpha_{}" ).format(n, n_exp, hetero_coef, d, d_x, p, d_t, cov_type, alpha) _append_coverage( key, coverage_est, est, X_test, alpha, true_coef, true_effect) _append_coverage( key, coverage_lr, lr, X_test, alpha, true_coef, true_effect) if it == n_exp - 1: n_tests += 1 mean_coef_cov = np.mean( coverage_est[key]['coef_cov']) mean_eff_cov = np.mean( coverage_est[key] ['effect_cov']) mean_coef_cov_lr = np.mean( coverage_lr[key]['coef_cov']) mean_eff_cov_lr = np.mean( coverage_lr[key]['effect_cov']) [ print( "{}. Time: {:.2f}, Mean Coef Cov: ({:.4f}, {:.4f}), " "Mean Effect Cov: ({:.4f}, {:.4f})" .format( key, time.time() - t0, mean_coef_cov, mean_coef_cov_lr, mean_eff_cov, mean_eff_cov_lr), file=f) for f in [ None, open( results_filename, "a") ] ] coef_cov_dev = mean_coef_cov - ( 1 - alpha) if np.abs(coef_cov_dev) >= cov_tol: n_failed_coef += 1 [ print( "BAD coef coverage on " "average: deviation = {:.4f}" .format(coef_cov_dev), file=f) for f in [ None, open( results_filename, "a") ] ] eff_cov_dev = mean_eff_cov - ( 1 - alpha) if np.abs(eff_cov_dev) >= cov_tol: n_failed_effect += 1 [ print( "BAD effect coverage on " "average: deviation = {:.4f}" .format(eff_cov_dev), file=f) for f in [ None, open( results_filename, "a") ] ] [ print( "Finished {} Monte Carlo Tests. Failed Coef Coverage Tests: {}/{}." "Failed Effect Coverage Tests: {}/{}. (Coverage Tolerance={})". format(n_tests, n_failed_coef, n_tests, n_failed_effect, n_tests, cov_tol), file=f) for f in [None, open(results_filename, "a")] ] agg_coverage_est, std_coverage_est, q_coverage_est = _agg_coverage( coverage_est) agg_coverage_lr, std_coverage_lr, q_coverage_lr = _agg_coverage( coverage_lr) [ print("\nResults for: {}\n--------------------------\n".format(folder), file=f) for f in [None, open(results_filename, "a")] ] plot_coverage(agg_coverage_est, 'coef_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="sum_", folder=folder) plot_coverage(agg_coverage_lr, 'coef_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="orig_", folder=folder) plot_coverage(agg_coverage_est, 'effect_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="sum_", folder=folder) plot_coverage(agg_coverage_lr, 'effect_cov', n, n_exp, hetero_coef_list, d_list, d_x_list, p_list, t_list, cov_type_list, alpha_list, prefix="orig_", folder=folder) [ print("Summarized Data\n----------------", file=f) for f in [None, open(results_filename, "a")] ] print_aggregate(agg_coverage_est, std_coverage_est, q_coverage_est) print_aggregate(agg_coverage_est, std_coverage_est, q_coverage_est, lambda: open(results_filename, "a")) [ print("\nUn-Summarized Data\n-----------------", file=f) for f in [None, open(results_filename, "a")] ] print_aggregate(agg_coverage_lr, std_coverage_lr, q_coverage_lr) print_aggregate(agg_coverage_lr, std_coverage_lr, q_coverage_lr, lambda: open(results_filename, "a"))
def test_random_cate_settings(self): """Verify that we can call methods on the CATE interpreter with various combinations of inputs""" n = 100 for _ in range(100): t_shape = (n, ) if self.coinflip else (n, 1) y_shape = (n, ) if self.coinflip else (n, 1) discrete_t = self.coinflip() X = np.random.normal(size=(n, 4)) X2 = np.random.normal(size=(10, 4)) T = np.random.binomial( 1, 0.5, size=t_shape) if discrete_t else np.random.normal( size=t_shape) Y = np.random.normal(size=y_shape) est = LinearDMLCateEstimator(discrete_treatment=discrete_t) fit_kwargs = {} cate_init_kwargs = {} policy_init_kwargs = {} intrp_kwargs = {} policy_intrp_kwargs = {} common_kwargs = {} plot_kwargs = {} render_kwargs = {} export_kwargs = {} if self.coinflip(): fit_kwargs.update(inference='statsmodels') cate_init_kwargs.update(include_model_uncertainty=True) policy_init_kwargs.update(risk_level=0.1) if self.coinflip(): cate_init_kwargs.update(uncertainty_level=0.01) if self.coinflip(): policy_init_kwargs.update(risk_seeking=True) if self.coinflip(): policy_intrp_kwargs.update( treatment_names=['control gp', 'treated gp']) if self.coinflip(1 / 3): policy_intrp_kwargs.update(sample_treatment_costs=0.1) elif self.coinflip(): policy_intrp_kwargs.update( sample_treatment_costs=np.random.normal(size=(10, ))) if self.coinflip(): common_kwargs.update(feature_names=['A', 'B', 'C', 'D']) if self.coinflip(): common_kwargs.update(filled=False) if self.coinflip(): common_kwargs.update(rounded=False) if self.coinflip(): common_kwargs.update(precision=1) if self.coinflip(): render_kwargs.update(rotate=True) export_kwargs.update(rotate=True) if self.coinflip(): render_kwargs.update(leaves_parallel=False) export_kwargs.update(leaves_parallel=False) if self.coinflip(): render_kwargs.update(format='png') if self.coinflip(): export_kwargs.update(out_file='out') if self.coinflip(0.95): # don't launch files most of the time render_kwargs.update(view=False) with self.subTest(t_shape=t_shape, y_shape=y_shape, discrete_t=discrete_t, fit_kwargs=fit_kwargs, cate_init_kwargs=cate_init_kwargs, policy_init_kwargs=policy_init_kwargs, policy_intrp_kwargs=policy_intrp_kwargs, intrp_kwargs=intrp_kwargs, common_kwargs=common_kwargs, plot_kwargs=plot_kwargs, render_kwargs=render_kwargs, export_kwargs=export_kwargs): plot_kwargs.update(common_kwargs) render_kwargs.update(common_kwargs) export_kwargs.update(common_kwargs) policy_intrp_kwargs.update(intrp_kwargs) est.fit(Y, T, X, **fit_kwargs) intrp = SingleTreeCateInterpreter(**cate_init_kwargs) intrp.interpret(est, X2, **intrp_kwargs) intrp.plot(**plot_kwargs) intrp.render('outfile', **render_kwargs) intrp.export_graphviz(**export_kwargs) intrp = SingleTreePolicyInterpreter(**policy_init_kwargs) try: intrp.interpret(est, X2, **policy_intrp_kwargs) intrp.plot(**plot_kwargs) intrp.render('outfile', **render_kwargs) intrp.export_graphviz(**export_kwargs) except AttributeError as e: assert str(e).find("samples should") >= 0
def test_dominicks(): file_name = "oj_large.csv" if not os.path.isfile(file_name): print("Downloading file (this might take a few seconds)...") urllib.request.urlretrieve( "https://msalicedatapublic.blob.core.windows.net/datasets/OrangeJuice/oj_large.csv", file_name) oj_data = pd.read_csv(file_name) brands = sorted(set(oj_data["brand"])) stores = sorted(set(oj_data["store"])) featnames = ["week", "feat"] + list(oj_data.columns[6:]) # Preprocess data import datetime import numpy as np # Convert 'week' to a date # week_zero = datetime.datetime.strptime("09/07/89", "%m/%d/%y") # oj_data["week"] = pd.to_timedelta(oj_data["week"], unit='w') + week_zero # Take log of price oj_data["logprice"] = np.log(oj_data["price"]) oj_data.drop("price", axis=1, inplace=True) # Make brand numeric oj_data["brand"] = [brands.index(b) for b in oj_data["brand"]] class PriceFeaturizer(TransformerMixin): def __init__(self, n_prods, own_price=True, cross_price_groups=False, cross_price_indiv=True, per_product_effects=True): base_arrays = [] effect_names = [] one_hots = [(0,) * p + (1,) + (0,) * (n_prods - p - 1) for p in range(n_prods)] if own_price: base_arrays.append(np.eye(n_prods)) effect_names.append("own price") if cross_price_groups: base_arrays.append((np.ones((n_prods, n_prods)) - np.eye(n_prods)) / (n_prods - 1)) effect_names.append("group cross price") if cross_price_indiv: for p in range(n_prods): base_arrays.append(one_hots[p] * np.ones((n_prods, 1)) - np.diag(one_hots[p])) effect_names.append("cross price effect {} ->".format(p)) if per_product_effects: all = [(np.diag(one_hots[p]) @ arr, nm + " {}".format(p)) for arr, nm in zip(base_arrays, effect_names) for p in range(n_prods)] # remove meaningless features (e.g. cross-price effects of products on themselves), # which have all zero coeffs nonempty = [(arr, nm) for arr, nm in all if np.count_nonzero(arr) > 0] self._features = [arr for arr, _ in nonempty] self._names = [nm for _, nm in nonempty] else: self._features = base_arrays self._names = effect_names def fit(self, X): self._is_fitted = True assert shape(X)[1] == 0 return self def transform(self, X): assert self._is_fitted assert shape(X)[1] == 0 return np.tile(self._features, (shape(X)[0], 1, 1, 1)) @property def names(self): return self._names for name, op, xp_g, xp_i, pp in [("Homogeneous treatment effect", True, False, False, False), ("Heterogeneous treatment effects", True, False, False, True), (("Heterogeneous treatment effects" " with group effects"), True, True, False, True), (("Heterogeneous treatment effects" " with cross price effects"), True, False, True, True)]: print(name) np.random.seed(42) ft = PriceFeaturizer(n_prods=3, own_price=op, cross_price_groups=xp_g, cross_price_indiv=xp_i, per_product_effects=pp) names = ft.names dml = LinearDMLCateEstimator(model_y=RandomForestRegressor(), model_t=RandomForestRegressor(), featurizer=ft, n_splits=2) effects = [] for store in stores: data = oj_data[oj_data['store'] == store].sort_values(by=['week', 'brand']) dml.fit(T=reshape(data.as_matrix(["logprice"]), (-1, 3)), Y=reshape(data.as_matrix(["logmove"]), (-1, 3)), W=reshape(data.as_matrix(featnames), (-1, 3 * len(featnames)))) effects.append(dml.coef_) effects = np.array(effects) for nm, eff in zip(names, effects.T): print(" Effect: {}".format(nm)) print(" Mean: {}".format(np.mean(eff))) print(" Std.: {}".format(np.std(eff))) class ConstFt(TransformerMixin): def fit(self, X): return self def transform(self, X): return np.ones((shape(X)[0], 1)) print("Vanilla HTE+XP") np.random.seed(42) dml = LinearDMLCateEstimator(model_y=RandomForestRegressor(), model_t=RandomForestRegressor(), featurizer=ConstFt(), n_splits=2) effects = [] for store in stores: data = oj_data[oj_data['store'] == store].sort_values(by=['week', 'brand']) dml.fit(T=reshape(data.as_matrix(["logprice"]), (-1, 3)), Y=reshape(data.as_matrix(["logmove"]), (-1, 3)), W=reshape(data.as_matrix(featnames), (-1, 3 * len(featnames)))) effects.append(dml.coef_) effects = np.array(effects) names = ["{} on {}".format(i, j) for j in range(3) for i in range(3)] for nm, eff in zip(names, reshape(effects, (-1, 9)).T): print(" Effect: {}".format(nm)) print(" Mean: {}".format(np.mean(eff))) print(" Std.: {}".format(np.std(eff)))
def test_all_kinds(self): T = [1, 0, 1, 2, 0, 2] * 5 Y = [1, 2, 3, 4, 5, 6] * 5 X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1) est = LinearDMLCateEstimator(n_splits=2) for kind in ['percentile', 'pivot', 'normal']: with self.subTest(kind=kind): inference = BootstrapInference(n_bootstrap_samples=5, bootstrap_type=kind) est.fit(Y, T, inference=inference) i = est.const_marginal_effect_interval() inf = est.const_marginal_effect_inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) est.fit(Y, T, X=X, inference=inference) i = est.const_marginal_effect_interval(X) inf = est.const_marginal_effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.coef__interval() inf = est.coef__inference() assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1]) i = est.effect_interval(X) inf = est.effect_inference(X) assert i[0].shape == i[1].shape == inf.point_estimate.shape assert np.allclose(i[0], inf.conf_int()[0]) assert np.allclose(i[1], inf.conf_int()[1])
def test_stratify(self): """Test that we can properly stratify by treatment""" T = [1, 0, 1, 2, 0, 2] Y = [1, 2, 3, 4, 5, 6] X = np.array([1, 1, 2, 2, 1, 2]).reshape(-1, 1) est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LogisticRegression(), discrete_treatment=True) inference = BootstrapInference(n_bootstrap_samples=5) est.fit(Y, T, inference=inference) est.const_marginal_effect_interval() est.fit(Y, T, X=X, inference=inference) est.const_marginal_effect_interval(X) est.fit(Y, np.asarray(T).reshape(-1, 1), inference=inference) # test stratifying 2D treatment est.const_marginal_effect_interval()
def test_summary(self): """Tests the inference results summary for continuous treatment estimators.""" # Test inference results when `cate_feature_names` doesn not exist for inference in [ BootstrapInference(n_bootstrap_samples=5), 'statsmodels' ]: cate_est = LinearDMLCateEstimator( model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False)) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) summary_results = cate_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names() np.testing.assert_array_equal(coef_rows, fnames) intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0] np.testing.assert_array_equal(intercept_rows, ['intercept']) cate_est = LinearDMLCateEstimator( model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False)) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(feat_name=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(input_features=fnames) np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDMLCateEstimator(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) summary_results = cate_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal( coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDMLCateEstimator(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(feat_name=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDMLCateEstimator(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) summary_results = wrapped_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal( coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDMLCateEstimator(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = wrapped_est.summary(feat_name=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames)
def test_dml_sum_vs_original_rf(self): """ Testing that the summarized version of DML gives the same results as the non-summarized when RandomForest is used for first stage models. """ np.random.seed(123) def first_stage_model(): return RandomForestRegressor(n_estimators=10, bootstrap=False, random_state=123) n = 1000 for d in [1, 5]: for p in [1, 5]: for cov_type in ['nonrobust', 'HC0', 'HC1']: for alpha in [.01, .05, .2]: X = np.random.binomial(1, .8, size=(n, d)) T = np.random.binomial(1, .5 * X[:, 0] + .25, size=(n,)) def true_effect(x): return np.hstack([x[:, [0]] + t for t in range(p)]) y = true_effect(X) * T.reshape(-1, 1) + X[:, [0] * p] + \ (1 * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p)) if p == 1: y = y.flatten() X_test = np.random.binomial(1, .5, size=(100, d)) XT = np.hstack([X, T.reshape(-1, 1)]) (X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, n_sum_first, n_sum_sec, var_first, var_sec) = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) X_final = np.vstack([X_final_first, X_final_sec]) y_sum = np.concatenate((y_sum_first, y_sum_sec)) n_sum = np.concatenate((n_sum_first, n_sum_sec)) var_sum = np.concatenate((var_first, var_sec)) first_half_sum = len(y_sum_first) first_half = len(y1) class SplitterSum: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half_sum), np.arange(first_half_sum, X.shape[0])), (np.arange(first_half_sum, X.shape[0]), np.arange(0, first_half_sum))] est = LinearDMLCateEstimator( model_y=first_stage_model(), model_t=first_stage_model(), n_splits=SplitterSum(), linear_first_stages=False, discrete_treatment=False).fit(y_sum, X_final[:, -1], X_final[:, :-1], None, sample_weight=n_sum, sample_var=var_sum, inference=StatsModelsInference(cov_type=cov_type)) class Splitter: def __init__(self): return def split(self, X, T): return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])), (np.arange(first_half, X.shape[0]), np.arange(0, first_half))] lr = LinearDMLCateEstimator( model_y=first_stage_model(), model_t=first_stage_model(), n_splits=Splitter(), linear_first_stages=False, discrete_treatment=False).fit(y, X[:, -1], X[:, :-1], None, inference=StatsModelsInference(cov_type=cov_type)) _compare_dml_classes(est, lr, X_test, alpha=alpha)