def test_nonparam_dml(self): y, T, X, W = self._get_data() dml = NonParamDML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=WeightedLasso(), random_state=123) dml.fit(y, T, X=X, W=W) with pytest.raises(Exception): dml.refit_final() dml.fit(y, T, X=X, W=W, cache_values=True) dml.model_final = DebiasedLasso(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, DebiasedLasso) dml.effect_interval(X[:1]) dml.featurizer = PolynomialFeatures(degree=2, include_bias=False) dml.refit_final() assert isinstance(dml.featurizer_, PolynomialFeatures) dml.effect_interval(X[:1]) dml.discrete_treatment = True dml.featurizer = None dml.linear_first_stages = True dml.model_t = LogisticRegression() dml.model_final = DebiasedLasso() dml.fit(y, T, X=X, W=W) newdml = NonParamDML(model_y=LinearRegression(), model_t=LogisticRegression(), model_final=DebiasedLasso(), discrete_treatment=True, random_state=123).fit(y, T, X=X, W=W) np.testing.assert_array_equal(dml.effect(X[:1]), newdml.effect(X[:1])) np.testing.assert_array_equal(dml.effect_interval(X[:1])[0], newdml.effect_interval(X[:1])[0])
def _check_debiased_coefs(self, X, y, sample_weight, expected_coefs, expected_intercept=0, params={}): debiased_lasso = MultiOutputDebiasedLasso( ) if np.ndim(y) > 1 else DebiasedLasso() debiased_lasso.set_params(**params) debiased_lasso.fit(X, y, sample_weight) all_params = debiased_lasso.get_params() # Check coeffcients and intercept are the same within tolerance if np.ndim(y) > 1: for i in range(y.shape[1]): np.testing.assert_allclose(debiased_lasso.coef_[i], expected_coefs[i], atol=5e-2) if all_params["fit_intercept"]: self.assertAlmostEqual(debiased_lasso.intercept_[i], expected_intercept[i], delta=1e-2) else: np.testing.assert_allclose(debiased_lasso.coef_, expected_coefs, atol=5e-2) if all_params["fit_intercept"]: self.assertAlmostEqual(debiased_lasso.intercept_, expected_intercept, delta=1e-2) return debiased_lasso.coef_
def _check_debiased_CI(self, X, y, sample_weight, expected_coefs, expected_intercept=0, n_experiments=200, params={}): # Unit vectors X_test = np.eye(TestLassoExtensions.n_dim) y_test_mean = expected_intercept + expected_coefs is_in_interval = np.zeros((n_experiments, TestLassoExtensions.n_dim)) for i in range(n_experiments): np.random.seed(i) X_exp = np.random.normal(size=X.shape) err = np.random.normal(scale=TestLassoExtensions.error_sd, size=X.shape[0]) y_exp = expected_intercept + np.dot(X_exp, expected_coefs) + err debiased_lasso = DebiasedLasso() debiased_lasso.set_params(**params) debiased_lasso.fit(X_exp, y_exp, sample_weight) y_lower, y_upper = debiased_lasso.predict_interval(X_test, alpha=0.1) is_in_interval[i] = ((y_test_mean >= y_lower) & (y_test_mean <= y_upper)) CI_coverage = np.mean(is_in_interval, axis=0) self.assertTrue(all(CI_coverage >= 0.85)) self.assertTrue(all(CI_coverage <= 0.95))
def test_multi_output_debiased_lasso(self): """Test MultiOutputDebiasedLasso.""" # Test that attributes propagate correctly est = MultiOutputDebiasedLasso() multioutput_attrs = est.get_params() debiased_attrs = DebiasedLasso().get_params() for attr in debiased_attrs: self.assertTrue(attr in multioutput_attrs) # Test MultiOutputDebiasedLasso without weights # --> Check debiased coeffcients without intercept params = {'fit_intercept': False} self._check_debiased_coefs(TestLassoExtensions.X, TestLassoExtensions.y_2D_consistent, sample_weight=None, expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2], params=params) # --> Check debiased coeffcients with intercept intercept_2D = np.array([TestLassoExtensions.intercept1, TestLassoExtensions.intercept2]) self._check_debiased_coefs(TestLassoExtensions.X, TestLassoExtensions.y_2D_consistent + intercept_2D, sample_weight=None, expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2], expected_intercept=intercept_2D) # --> Check CI coverage self._check_debiased_CI_2D(TestLassoExtensions.X, TestLassoExtensions.y_2D_consistent + intercept_2D, sample_weight=None, expected_coefs=np.array([TestLassoExtensions.coefs1, TestLassoExtensions.coefs2]), expected_intercept=intercept_2D) # Test MultiOutputDebiasedLasso with weights # Define weights sample_weight = np.concatenate((np.ones(TestLassoExtensions.n_samples // 2), np.ones(TestLassoExtensions.n_samples // 2) * 2)) # Define extended datasets X_expanded = np.concatenate( (TestLassoExtensions.X, TestLassoExtensions.X[TestLassoExtensions.n_samples // 2:])) y_expanded = np.concatenate( (TestLassoExtensions.y_2D_consistent, TestLassoExtensions.y_2D_consistent[TestLassoExtensions.n_samples // 2:])) # --> Check debiased coefficients weighted_debiased_coefs = self._check_debiased_coefs( TestLassoExtensions.X, TestLassoExtensions.y_2D_consistent, sample_weight=sample_weight, expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2], params=params) expanded_debiased_coefs = self._check_debiased_coefs( X_expanded, y_expanded, sample_weight=None, expected_coefs=[TestLassoExtensions.coefs1, TestLassoExtensions.coefs2], params=params) for i in range(2): self.assertTrue(np.allclose(weighted_debiased_coefs[i], expanded_debiased_coefs[i]))
def test_auto_inference(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W est = DRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), model_final=StatsModelsLinearRegression()) est.fit(Y, T, X=X, W=W) est.effect_inference(X).summary_frame() est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() est.marginal_effect_inference(T, X).summary_frame() est = DRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), model_final=LinearRegression(), multitask_model_final=True) est.fit(Y, T, X=X, W=W) with pytest.raises(AttributeError): est.effect_inference(X) est = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), random_state=123) est.fit(Y, T, X=X, W=W) est.summary() est.coef__inference().summary_frame() assert est.coef__inference().stderr is not None est.intercept__inference().summary_frame() assert est.intercept__inference().stderr is not None est.effect_inference(X).summary_frame() assert est.effect_inference(X).stderr is not None est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() assert est.const_marginal_effect_inference(X).stderr is not None est.marginal_effect_inference(T, X).summary_frame() assert est.marginal_effect_inference(T, X).stderr is not None est = NonParamDML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=DebiasedLasso(), random_state=123) est.fit(Y, T, X=X, W=W) est.effect_inference(X).summary_frame() assert est.effect_inference(X).stderr is not None est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() assert est.const_marginal_effect_inference(X).stderr is not None est.marginal_effect_inference(T, X).summary_frame() assert est.marginal_effect_inference(T, X).stderr is not None
def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'), model_regression=WeightedLassoCV(cv=3), featurizer=None, fit_cate_intercept=True, alpha='auto', max_iter=1000, tol=1e-4, n_splits=2, random_state=None): model_final = DebiasedLasso( alpha=alpha, fit_intercept=fit_cate_intercept, max_iter=max_iter, tol=tol) super().__init__(model_propensity=model_propensity, model_regression=model_regression, model_final=model_final, featurizer=featurizer, multitask_model_final=False, n_splits=n_splits, random_state=random_state)
def test_dml(self): """Test setting attributes and refitting""" y, T, X, W = self._get_data() dml = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), linear_first_stages=False, random_state=123) dml.fit(y, T, X=X, W=W) with pytest.raises(Exception): dml.refit_final() dml.fit(y, T, X=X, W=W, cache_values=True) dml.model_final = StatsModelsRLM(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, StatsModelsRLM) np.testing.assert_array_equal(dml.model_cate.coef_[1:].flatten(), dml.coef_.flatten()) lb, ub = dml.model_cate.coef__interval(alpha=0.01) lbt, ubt = dml.coef__interval(alpha=0.01) np.testing.assert_array_equal(lb[1:].flatten(), lbt.flatten()) np.testing.assert_array_equal(ub[1:].flatten(), ubt.flatten()) intcpt = dml.intercept_ dml.fit_cate_intercept = False np.testing.assert_equal(dml.intercept_, intcpt) dml.refit_final() np.testing.assert_array_equal(dml.model_cate.coef_.flatten(), dml.coef_.flatten()) lb, ub = dml.model_cate.coef__interval(alpha=0.01) lbt, ubt = dml.coef__interval(alpha=0.01) np.testing.assert_array_equal(lb.flatten(), lbt.flatten()) np.testing.assert_array_equal(ub.flatten(), ubt.flatten()) with pytest.raises(AttributeError): dml.intercept_ with pytest.raises(AttributeError): dml.intercept__interval() dml.model_final = DebiasedLasso(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, DebiasedLasso) dml.featurizer = PolynomialFeatures(degree=2, include_bias=False) dml.model_final = StatsModelsLinearRegression(fit_intercept=False) dml.refit_final() assert isinstance(dml.featurizer_, PolynomialFeatures) dml.fit_cate_intercept = True dml.refit_final() assert isinstance(dml.featurizer_, Pipeline) np.testing.assert_array_equal(dml.coef_.shape, (X.shape[1]**2)) np.testing.assert_array_equal(dml.coef__interval()[0].shape, (X.shape[1]**2)) coefpre = dml.coef_ coefpreint = dml.coef__interval() dml.fit(y, T, X=X, W=W) np.testing.assert_array_equal(coefpre, dml.coef_) np.testing.assert_array_equal(coefpreint[0], dml.coef__interval()[0]) dml.discrete_treatment = True dml.featurizer = None dml.linear_first_stages = True dml.model_t = LogisticRegression() dml.fit(y, T, X=X, W=W) newdml = DML(model_y=LinearRegression(), model_t=LogisticRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), discrete_treatment=True, linear_first_stages=True, random_state=123).fit(y, T, X=X, W=W) np.testing.assert_array_equal(dml.coef_, newdml.coef_) np.testing.assert_array_equal(dml.coef__interval()[0], newdml.coef__interval()[0]) ldml = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) ldml.fit(y, T, X=X, W=W, cache_values=True) # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression) with pytest.raises(ValueError): ldml.model_final = StatsModelsRLM() ldml = SparseLinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) ldml.fit(y, T, X=X, W=W, cache_values=True) # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression) with pytest.raises(ValueError): ldml.model_final = StatsModelsRLM() ldml.alpha = 0.01 ldml.max_iter = 10 ldml.tol = 0.01 ldml.refit_final() np.testing.assert_equal(ldml.model_cate.estimators_[0].alpha, 0.01) np.testing.assert_equal(ldml.model_cate.estimators_[0].max_iter, 10) np.testing.assert_equal(ldml.model_cate.estimators_[0].tol, 0.01)