def test_pickle_inferenceresult(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W est = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=Lasso(alpha=0.1, fit_intercept=False), featurizer=PolynomialFeatures(degree=1, include_bias=False), random_state=123) est.fit(Y, T, X=X, W=W) effect_inf = est.effect_inference(X) s = pickle.dumps(effect_inf)
def test_rlearner_residuals(self): y, T, X, W = self._get_data() dml = DML(model_y=LinearRegression(), model_t=LinearRegression(), cv=1, model_final=StatsModelsLinearRegression(fit_intercept=False), linear_first_stages=False, random_state=123) with pytest.raises(AttributeError): y_res, T_res, X_res, W_res = dml.residuals_ dml.fit(y, T, X=X, W=W) with pytest.raises(AttributeError): y_res, T_res, X_res, W_res = dml.residuals_ dml.fit(y, T, X=X, W=W, cache_values=True) y_res, T_res, X_res, W_res = dml.residuals_ np.testing.assert_array_equal(X, X_res) np.testing.assert_array_equal(W, W_res) XW = np.hstack([X, W]) np.testing.assert_array_equal(y_res, y - LinearRegression().fit(XW, y).predict(XW)) np.testing.assert_array_equal(T_res, T - LinearRegression().fit(XW, T).predict(XW))
def test_inference_with_none_stderr(self): Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W est = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=Lasso(alpha=0.1, fit_intercept=False), featurizer=PolynomialFeatures(degree=1, include_bias=False), random_state=123) est.fit(Y, T, X=X, W=W) est.summary() est.coef__inference().summary_frame() est.intercept__inference().summary_frame() est.effect_inference(X).summary_frame() est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() est.marginal_effect_inference(T, X).summary_frame() est = NonParamDML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=LinearRegression(fit_intercept=False), featurizer=PolynomialFeatures(degree=1, include_bias=False), random_state=123) est.fit(Y, T, X=X, W=W) est.effect_inference(X).summary_frame() est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() est.marginal_effect_inference(T, X).summary_frame() est = DRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), model_final=LinearRegression()) est.fit(Y, T, X=X, W=W) est.effect_inference(X).summary_frame() est.effect_inference(X).population_summary() est.const_marginal_effect_inference(X).summary_frame() est.marginal_effect_inference(T, X).summary_frame()
def test_dml(self): """Test setting attributes and refitting""" y, T, X, W = self._get_data() dml = DML(model_y=LinearRegression(), model_t=LinearRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), linear_first_stages=False, random_state=123) dml.fit(y, T, X=X, W=W) with pytest.raises(Exception): dml.refit_final() dml.fit(y, T, X=X, W=W, cache_values=True) dml.model_final = StatsModelsRLM(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, StatsModelsRLM) np.testing.assert_array_equal(dml.model_cate.coef_[1:].flatten(), dml.coef_.flatten()) lb, ub = dml.model_cate.coef__interval(alpha=0.01) lbt, ubt = dml.coef__interval(alpha=0.01) np.testing.assert_array_equal(lb[1:].flatten(), lbt.flatten()) np.testing.assert_array_equal(ub[1:].flatten(), ubt.flatten()) intcpt = dml.intercept_ dml.fit_cate_intercept = False np.testing.assert_equal(dml.intercept_, intcpt) dml.refit_final() np.testing.assert_array_equal(dml.model_cate.coef_.flatten(), dml.coef_.flatten()) lb, ub = dml.model_cate.coef__interval(alpha=0.01) lbt, ubt = dml.coef__interval(alpha=0.01) np.testing.assert_array_equal(lb.flatten(), lbt.flatten()) np.testing.assert_array_equal(ub.flatten(), ubt.flatten()) with pytest.raises(AttributeError): dml.intercept_ with pytest.raises(AttributeError): dml.intercept__interval() dml.model_final = DebiasedLasso(fit_intercept=False) dml.refit_final() assert isinstance(dml.model_cate, DebiasedLasso) dml.featurizer = PolynomialFeatures(degree=2, include_bias=False) dml.model_final = StatsModelsLinearRegression(fit_intercept=False) dml.refit_final() assert isinstance(dml.featurizer_, PolynomialFeatures) dml.fit_cate_intercept = True dml.refit_final() assert isinstance(dml.featurizer_, Pipeline) np.testing.assert_array_equal(dml.coef_.shape, (X.shape[1]**2)) np.testing.assert_array_equal(dml.coef__interval()[0].shape, (X.shape[1]**2)) coefpre = dml.coef_ coefpreint = dml.coef__interval() dml.fit(y, T, X=X, W=W) np.testing.assert_array_equal(coefpre, dml.coef_) np.testing.assert_array_equal(coefpreint[0], dml.coef__interval()[0]) dml.discrete_treatment = True dml.featurizer = None dml.linear_first_stages = True dml.model_t = LogisticRegression() dml.fit(y, T, X=X, W=W) newdml = DML(model_y=LinearRegression(), model_t=LogisticRegression(), model_final=StatsModelsLinearRegression(fit_intercept=False), discrete_treatment=True, linear_first_stages=True, random_state=123).fit(y, T, X=X, W=W) np.testing.assert_array_equal(dml.coef_, newdml.coef_) np.testing.assert_array_equal(dml.coef__interval()[0], newdml.coef__interval()[0]) ldml = LinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) ldml.fit(y, T, X=X, W=W, cache_values=True) # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression) with pytest.raises(ValueError): ldml.model_final = StatsModelsRLM() ldml = SparseLinearDML(model_y=LinearRegression(), model_t=LinearRegression(), linear_first_stages=False) ldml.fit(y, T, X=X, W=W, cache_values=True) # can set final model for plain DML, but can't for LinearDML (hardcoded to StatsModelsRegression) with pytest.raises(ValueError): ldml.model_final = StatsModelsRLM() ldml.alpha = 0.01 ldml.max_iter = 10 ldml.tol = 0.01 ldml.refit_final() np.testing.assert_equal(ldml.model_cate.estimators_[0].alpha, 0.01) np.testing.assert_equal(ldml.model_cate.estimators_[0].max_iter, 10) np.testing.assert_equal(ldml.model_cate.estimators_[0].tol, 0.01)