def test_with_econml(self): """Test that we can bootstrap econml estimators.""" x = np.random.normal(size=(1000, 2)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) est = LinearDMLCateEstimator(LinearRegression(), LinearRegression()) est.fit(y, t, x) bs = BootstrapEstimator(est, 50) # test that we can fit with the same arguments as the base estimator bs.fit(y, t, x) # test that we can get the same attribute for the bootstrap as the original, with the same shape self.assertEqual(np.shape(est.coef_), np.shape(bs.coef_)) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = bs.coef__interval() for bound in [lower, upper]: self.assertEqual(np.shape(est.coef_), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that we can do the same thing once we provide percentile bounds lower, upper = bs.coef__interval(lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.coef_), np.shape(bound)) # test that we can do the same thing with the results of a method, rather than an attribute self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bs.effect(x, T0=t, T1=t2))) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = bs.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that we can do the same thing once we provide percentile bounds lower, upper = bs.effect_interval(x, T0=t, T1=t2, lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, T0=t, T1=t2)), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any()
def test_can_use_statsmodel_inference(self): """Test that we can use statsmodels to generate confidence intervals""" dml = LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), discrete_treatment=True) dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)), inference='statsmodels') interval = dml.effect_interval(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]), alpha=0.05) point = dml.effect(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.const_marginal_effect_interval(np.ones((9, 1)), alpha=0.05) point = dml.const_marginal_effect(np.ones((9, 1))) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.coef__interval(alpha=0.05) point = dml.coef_ assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.intercept__interval(alpha=0.05) point = dml.intercept_ assert len(interval) == 2 lo, hi = interval assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width
def test_discrete_treatments(self): """Test that we can use discrete treatments""" dml = LinearDMLCateEstimator(LinearRegression(), LogisticRegression(C=1000), featurizer=FunctionTransformer(), discrete_treatment=True) # create a simple artificial setup where effect of moving from treatment # 1 -> 2 is 2, # 1 -> 3 is 1, and # 2 -> 3 is -1 (necessarily, by composing the previous two effects) # Using an uneven number of examples from different classes, # and having the treatments in non-lexicographic order, # Should rule out some basic issues. dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1))) np.testing.assert_almost_equal( dml.effect(np.ones((9, 1)), np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]), np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])), [0, 2, 1, -2, 0, -1, -1, 1, 0]) dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
def test_internal(self): """Test that the internal use of bootstrap within an estimator works.""" x = np.random.normal(size=(1000, 2)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) est = LinearDMLCateEstimator(LinearRegression(), LinearRegression()) est.fit(y, t, x, inference='bootstrap') # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds eff = est.effect(x, T0=t, T1=t2) lower, upper = est.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.9 # test that we can do the same thing once we provide alpha explicitly lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.8