def test_drlearner_clipping(self): X = np.linspace(0, 1, 200).reshape(-1, 1) T = np.random.binomial(1, X) Y = np.random.normal(size=T.shape) X[0] = -1000 # one split will have only X values between 0 and 1, # so the predicted propensity for this point will be extremely low learner = DRLearner() learner.fit(Y, T, X) effect = learner.const_marginal_effect(np.array([[0.5]])) assert not(np.any(np.isnan(effect)))
def test_auto_inference(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W est = DRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), model_final=StatsModelsLinearRegression()) est.fit(Y, T, X=X, W=W) est.effect_inference(X).summary_frame() est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() est.marginal_effect_inference(T, X).summary_frame() est = DRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), model_final=LinearRegression(), multitask_model_final=True) est.fit(Y, T, X=X, W=W) with pytest.raises(AttributeError): est.effect_inference(X) est = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), random_state=123) est.fit(Y, T, X=X, W=W) est.summary() est.coef__inference().summary_frame() assert est.coef__inference().stderr is not None est.intercept__inference().summary_frame() assert est.intercept__inference().stderr is not None est.effect_inference(X).summary_frame() assert est.effect_inference(X).stderr is not None est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() assert est.const_marginal_effect_inference(X).stderr is not None est.marginal_effect_inference(T, X).summary_frame() assert est.marginal_effect_inference(T, X).stderr is not None est = NonParamDML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=DebiasedLasso(), random_state=123) est.fit(Y, T, X=X, W=W) est.effect_inference(X).summary_frame() assert est.effect_inference(X).stderr is not None est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() assert est.const_marginal_effect_inference(X).stderr is not None est.marginal_effect_inference(T, X).summary_frame() assert est.marginal_effect_inference(T, X).stderr is not None
def test_drlearner_all_attributes(self): from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor from sklearn.linear_model import LinearRegression, LogisticRegression from econml.utilities import StatsModelsLinearRegression import scipy.special np.random.seed(123) controls = np.random.uniform(-1, 1, size=(5000, 3)) T = np.random.binomial(2, scipy.special.expit(controls[:, 0])) sigma = 0.01 y = (1 + .5 * controls[:, 0]) * T + controls[:, 0] + np.random.normal(0, sigma, size=(5000,)) for X in [controls]: for W in [None, controls]: for sample_weight in [None, 1 + np.random.randint(10, size=X.shape[0])]: for sample_var in [None, 1 + np.random.randint(10, size=X.shape[0])]: for featurizer in [None, PolynomialFeatures(degree=2, include_bias=False)]: for models in [(GradientBoostingClassifier(), GradientBoostingRegressor(), RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=50)), (GradientBoostingClassifier(), GradientBoostingRegressor(), RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=50)), (LogisticRegression(solver='lbfgs', multi_class='auto'), LinearRegression(), StatsModelsLinearRegression())]: for multitask_model_final in [False, True]: if (not isinstance(models, StatsModelsLinearRegression))\ and (sample_var is not None): continue with self.subTest(X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, featurizer=featurizer, models=models, multitask_model_final=multitask_model_final): est = DRLearner(model_propensity=models[0], model_regression=models[1], model_final=models[2], featurizer=featurizer, multitask_model_final=multitask_model_final) if (X is None) and (W is None): with pytest.raises(AttributeError) as e_info: est.fit(y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var) continue est.fit( y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var) np.testing.assert_allclose(est.effect(X[:3], T0=0, T1=1), 1 + .5 * X[:3, 0], rtol=0, atol=.15) np.testing.assert_allclose(est.const_marginal_effect(X[:3]), np.hstack( [1 + .5 * X[:3, [0]], 2 * (1 + .5 * X[:3, [0]])]), rtol=0, atol=.15) for t in [1, 2]: np.testing.assert_allclose(est.marginal_effect(t, X[:3]), np.hstack([1 + .5 * X[:3, [0]], 2 * (1 + .5 * X[:3, [0]])]), rtol=0, atol=.15) assert isinstance(est.score_, float) assert isinstance( est.score(y, T, X=X, W=W), float) feat_names = ['A', 'B', 'C'] out_feat_names = feat_names if featurizer is not None: out_feat_names = featurizer.fit( X).get_feature_names(feat_names) np.testing.assert_array_equal( est.featurizer.n_input_features_, 3) np.testing.assert_array_equal(est.cate_feature_names(feat_names), out_feat_names) if isinstance(models[0], GradientBoostingClassifier): np.testing.assert_array_equal(np.array([mdl.feature_importances_ for mdl in est.models_regression]).shape, [2, 2 + X.shape[1] + (W.shape[1] if W is not None else 0)]) np.testing.assert_array_equal(np.array([mdl.feature_importances_ for mdl in est.models_propensity]).shape, [2, X.shape[1] + (W.shape[1] if W is not None else 0)]) else: np.testing.assert_array_equal(np.array([mdl.coef_ for mdl in est.models_regression]).shape, [2, 2 + X.shape[1] + (W.shape[1] if W is not None else 0)]) np.testing.assert_array_equal(np.array([mdl.coef_ for mdl in est.models_propensity]).shape, [2, 3, X.shape[1] + (W.shape[1] if W is not None else 0)]) if multitask_model_final: if isinstance(models[2], RandomForestRegressor): np.testing.assert_equal(np.argsort( est.multitask_model_cate.feature_importances_)[-1], 0) else: true_coef = np.zeros( (2, len(out_feat_names))) true_coef[:, 0] = [.5, 1] np.testing.assert_allclose( est.multitask_model_cate.coef_, true_coef, rtol=0, atol=.15) np.testing.assert_allclose( est.multitask_model_cate.intercept_, [1, 2], rtol=0, atol=.15) else: for t in [1, 2]: if isinstance(models[2], RandomForestRegressor): np.testing.assert_equal(np.argsort( est.model_cate(T=t).feature_importances_)[-1], 0) else: true_coef = np.zeros( len(out_feat_names)) true_coef[0] = .5 * t np.testing.assert_allclose( est.model_cate(T=t).coef_, true_coef, rtol=0, atol=.15) np.testing.assert_allclose( est.model_cate(T=t).intercept_, t, rtol=0, atol=.15)