def test_doubleml_exception_confint(): dml_plr_confint = DoubleMLPLR(dml_data, ml_g, ml_m) msg = 'joint must be True or False. Got 1.' with pytest.raises(TypeError, match=msg): dml_plr_confint.confint(joint=1) msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed." with pytest.raises(TypeError, match=msg): dml_plr_confint.confint(level='5%') msg = r'The confidence level must be in \(0,1\). 0.0 was passed.' with pytest.raises(ValueError, match=msg): dml_plr_confint.confint(level=0.) msg = r'Apply fit\(\) before confint\(\).' with pytest.raises(ValueError, match=msg): dml_plr_confint.confint() msg = r'Apply fit\(\) & bootstrap\(\) before confint\(joint=True\).' with pytest.raises(ValueError, match=msg): dml_plr_confint.confint(joint=True) dml_plr_confint.fit() # error message should still appear till bootstrap was applied as well with pytest.raises(ValueError, match=msg): dml_plr_confint.confint(joint=True) dml_plr_confint.bootstrap() df_ci = dml_plr_confint.confint(joint=True) assert isinstance(df_ci, pd.DataFrame)
def test_add_vars_in_df(): # additional variables in the df shouldn't affect results np.random.seed(3141) df = make_plr_CCDDHNR2018(n_obs=100, return_type='DataFrame') dml_data_full_df = DoubleMLData(df, 'y', 'd', ['X1', 'X11', 'X13']) dml_data_subset = DoubleMLData(df[['X1', 'X11', 'X13'] + ['y', 'd']], 'y', 'd', ['X1', 'X11', 'X13']) dml_plr_full_df = DoubleMLPLR(dml_data_full_df, Lasso(), Lasso()) dml_plr_subset = DoubleMLPLR(dml_data_subset, Lasso(), Lasso(), draw_sample_splitting=False) dml_plr_subset.set_sample_splitting(dml_plr_full_df.smpls) dml_plr_full_df.fit() dml_plr_subset.fit() assert np.allclose(dml_plr_full_df.coef, dml_plr_subset.coef, rtol=1e-9, atol=1e-4) assert np.allclose(dml_plr_full_df.se, dml_plr_subset.se, rtol=1e-9, atol=1e-4)
def test_doubleml_exception_learner(): err_msg_prefix = 'Invalid learner provided for ml_g: ' warn_msg_prefix = 'Learner provided for ml_g is probably invalid: ' msg = err_msg_prefix + 'provide an instance of a learner instead of a class.' with pytest.raises(TypeError, match=msg): _ = DoubleMLPLR(dml_data, Lasso, ml_m) msg = err_msg_prefix + r'BaseEstimator\(\) has no method .fit\(\).' with pytest.raises(TypeError, match=msg): _ = DoubleMLPLR(dml_data, BaseEstimator(), ml_m) # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .set_params\(\).' with pytest.raises(TypeError): _ = DoubleMLPLR(dml_data, _DummyNoSetParams(), ml_m) # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .get_params\(\).' with pytest.raises(TypeError): _ = DoubleMLPLR(dml_data, _DummyNoGetParams(), ml_m) # msg = 'Learner provided for ml_m is probably invalid: ' + r'_DummyNoClassifier\(\) is \(probably\) no classifier.' with pytest.warns(UserWarning): _ = DoubleMLIRM(dml_data_irm, Lasso(), _DummyNoClassifier()) # ToDo: Currently for ml_g (and others) we only check whether the learner can be identified as regressor. However, # we do not check whether it can instead be identified as classifier, which could be used to throw an error. msg = warn_msg_prefix + r'LogisticRegression\(\) is \(probably\) no regressor.' with pytest.warns(UserWarning, match=msg): _ = DoubleMLPLR(dml_data, LogisticRegression(), Lasso()) # we allow classifiers for ml_m in PLR, but only for binary treatment variables msg = ( r'The ml_m learner LogisticRegression\(\) was identified as classifier ' 'but at least one treatment variable is not binary with values 0 and 1.' ) with pytest.raises(ValueError, match=msg): _ = DoubleMLPLR(dml_data, Lasso(), LogisticRegression()) # construct a classifier which is not identifiable as classifier via is_classifier by sklearn # it then predicts labels and therefore an exception will be thrown log_reg = LogisticRegression() log_reg._estimator_type = None msg = ( r'Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) neither a regressor ' 'nor a classifier. Method predict is used for prediction.') with pytest.warns(UserWarning, match=msg): dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg) msg = ( r'For the binary treatment variable d, predictions obtained with the ml_m learner LogisticRegression\(\) ' 'are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not ' 'labels are predicted.') with pytest.raises(ValueError, match=msg): dml_plr_hidden_classifier.fit()
def test_doubleml_exception_bootstrap(): dml_plr_boot = DoubleMLPLR(dml_data, ml_g, ml_m) msg = r'Apply fit\(\) before bootstrap\(\).' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap() dml_plr_boot.fit() msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap(method='Gaussian') msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed." with pytest.raises(TypeError, match=msg): dml_plr_boot.bootstrap(n_rep_boot='500') msg = 'The number of bootstrap replications must be positive. 0 was passed.' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap(n_rep_boot=0)
def test_doubleml_exception_p_adjust(): dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_g, ml_m) msg = r'Apply fit\(\) before p_adjust\(\).' with pytest.raises(ValueError, match=msg): dml_plr_p_adjust.p_adjust() dml_plr_p_adjust.fit() msg = r'Apply fit\(\) & bootstrap\(\) before p_adjust' with pytest.raises(ValueError, match=msg): dml_plr_p_adjust.p_adjust(method='romano-wolf') dml_plr_p_adjust.bootstrap() p_val = dml_plr_p_adjust.p_adjust(method='romano-wolf') assert isinstance(p_val, pd.DataFrame) msg = "The p_adjust method must be of str type. 0.05 of type <class 'float'> was passed." with pytest.raises(TypeError, match=msg): dml_plr_p_adjust.p_adjust(method=0.05)
# assert isinstance(dml_obj.tune(), cls) n_treat = 1 n_rep = 2 n_folds = 3 n_obs = 100 n_rep_boot = 314 plr_dml1 = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), dml_procedure='dml1', n_rep=n_rep, n_folds=n_folds) plr_dml1.fit() plr_dml1.bootstrap(n_rep_boot=n_rep_boot) pliv_dml1 = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), dml_procedure='dml1', n_rep=n_rep, n_folds=n_folds) pliv_dml1.fit() pliv_dml1.bootstrap(n_rep_boot=n_rep_boot) irm_dml1 = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
import pytest import numpy as np from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data from sklearn.linear_model import Lasso, LogisticRegression np.random.seed(3141) dml_data_plr = make_plr_CCDDHNR2018(n_obs=100) dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1) dml_data_irm = make_irm_data(n_obs=100) dml_data_iivm = make_iivm_data(n_obs=100) dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) dml_plr.fit() dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) dml_pliv.fit() dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) dml_irm.fit() dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression()) dml_iivm.fit() # fit models with callable scores plr_score = dml_plr._score_elements dml_plr_callable_score = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), score=plr_score, draw_sample_splitting=False)