def test_doubleml_exception_confint():
    dml_plr_confint = DoubleMLPLR(dml_data, ml_g, ml_m)

    msg = 'joint must be True or False. Got 1.'
    with pytest.raises(TypeError, match=msg):
        dml_plr_confint.confint(joint=1)
    msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
    with pytest.raises(TypeError, match=msg):
        dml_plr_confint.confint(level='5%')
    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint(level=0.)

    msg = r'Apply fit\(\) before confint\(\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint()
    msg = r'Apply fit\(\) & bootstrap\(\) before confint\(joint=True\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint(joint=True)
    dml_plr_confint.fit()  # error message should still appear till bootstrap was applied as well
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint(joint=True)
    dml_plr_confint.bootstrap()
    df_ci = dml_plr_confint.confint(joint=True)
    assert isinstance(df_ci, pd.DataFrame)
Esempio n. 2
0
def test_add_vars_in_df():
    # additional variables in the df shouldn't affect results
    np.random.seed(3141)
    df = make_plr_CCDDHNR2018(n_obs=100, return_type='DataFrame')
    dml_data_full_df = DoubleMLData(df, 'y', 'd', ['X1', 'X11', 'X13'])
    dml_data_subset = DoubleMLData(df[['X1', 'X11', 'X13'] + ['y', 'd']], 'y', 'd', ['X1', 'X11', 'X13'])
    dml_plr_full_df = DoubleMLPLR(dml_data_full_df, Lasso(), Lasso())
    dml_plr_subset = DoubleMLPLR(dml_data_subset, Lasso(), Lasso(), draw_sample_splitting=False)
    dml_plr_subset.set_sample_splitting(dml_plr_full_df.smpls)
    dml_plr_full_df.fit()
    dml_plr_subset.fit()
    assert np.allclose(dml_plr_full_df.coef, dml_plr_subset.coef, rtol=1e-9, atol=1e-4)
    assert np.allclose(dml_plr_full_df.se, dml_plr_subset.se, rtol=1e-9, atol=1e-4)
def test_doubleml_exception_learner():
    err_msg_prefix = 'Invalid learner provided for ml_g: '
    warn_msg_prefix = 'Learner provided for ml_g is probably invalid: '

    msg = err_msg_prefix + 'provide an instance of a learner instead of a class.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(dml_data, Lasso, ml_m)
    msg = err_msg_prefix + r'BaseEstimator\(\) has no method .fit\(\).'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(dml_data, BaseEstimator(), ml_m)
    # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .set_params\(\).'
    with pytest.raises(TypeError):
        _ = DoubleMLPLR(dml_data, _DummyNoSetParams(), ml_m)
    # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .get_params\(\).'
    with pytest.raises(TypeError):
        _ = DoubleMLPLR(dml_data, _DummyNoGetParams(), ml_m)

    # msg = 'Learner provided for ml_m is probably invalid: ' + r'_DummyNoClassifier\(\) is \(probably\) no classifier.'
    with pytest.warns(UserWarning):
        _ = DoubleMLIRM(dml_data_irm, Lasso(), _DummyNoClassifier())

    # ToDo: Currently for ml_g (and others) we only check whether the learner can be identified as regressor. However,
    # we do not check whether it can instead be identified as classifier, which could be used to throw an error.
    msg = warn_msg_prefix + r'LogisticRegression\(\) is \(probably\) no regressor.'
    with pytest.warns(UserWarning, match=msg):
        _ = DoubleMLPLR(dml_data, LogisticRegression(), Lasso())

    # we allow classifiers for ml_m in PLR, but only for binary treatment variables
    msg = (
        r'The ml_m learner LogisticRegression\(\) was identified as classifier '
        'but at least one treatment variable is not binary with values 0 and 1.'
    )
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLPLR(dml_data, Lasso(), LogisticRegression())

    # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
    # it then predicts labels and therefore an exception will be thrown
    log_reg = LogisticRegression()
    log_reg._estimator_type = None
    msg = (
        r'Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) neither a regressor '
        'nor a classifier. Method predict is used for prediction.')
    with pytest.warns(UserWarning, match=msg):
        dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg)
    msg = (
        r'For the binary treatment variable d, predictions obtained with the ml_m learner LogisticRegression\(\) '
        'are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not '
        'labels are predicted.')
    with pytest.raises(ValueError, match=msg):
        dml_plr_hidden_classifier.fit()
def test_doubleml_exception_bootstrap():
    dml_plr_boot = DoubleMLPLR(dml_data, ml_g, ml_m)
    msg = r'Apply fit\(\) before bootstrap\(\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_boot.bootstrap()

    dml_plr_boot.fit()
    msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
    with pytest.raises(ValueError, match=msg):
        dml_plr_boot.bootstrap(method='Gaussian')
    msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
    with pytest.raises(TypeError, match=msg):
        dml_plr_boot.bootstrap(n_rep_boot='500')
    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
    with pytest.raises(ValueError, match=msg):
        dml_plr_boot.bootstrap(n_rep_boot=0)
def test_doubleml_exception_p_adjust():
    dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_g, ml_m)

    msg = r'Apply fit\(\) before p_adjust\(\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_p_adjust.p_adjust()
    dml_plr_p_adjust.fit()
    msg = r'Apply fit\(\) & bootstrap\(\) before p_adjust'
    with pytest.raises(ValueError, match=msg):
        dml_plr_p_adjust.p_adjust(method='romano-wolf')
    dml_plr_p_adjust.bootstrap()
    p_val = dml_plr_p_adjust.p_adjust(method='romano-wolf')
    assert isinstance(p_val, pd.DataFrame)

    msg = "The p_adjust method must be of str type. 0.05 of type <class 'float'> was passed."
    with pytest.raises(TypeError, match=msg):
        dml_plr_p_adjust.p_adjust(method=0.05)
Esempio n. 6
0
    # assert isinstance(dml_obj.tune(), cls)


n_treat = 1
n_rep = 2
n_folds = 3
n_obs = 100
n_rep_boot = 314

plr_dml1 = DoubleMLPLR(dml_data_plr,
                       Lasso(),
                       Lasso(),
                       dml_procedure='dml1',
                       n_rep=n_rep,
                       n_folds=n_folds)
plr_dml1.fit()
plr_dml1.bootstrap(n_rep_boot=n_rep_boot)

pliv_dml1 = DoubleMLPLIV(dml_data_pliv,
                         Lasso(),
                         Lasso(),
                         Lasso(),
                         dml_procedure='dml1',
                         n_rep=n_rep,
                         n_folds=n_folds)
pliv_dml1.fit()
pliv_dml1.bootstrap(n_rep_boot=n_rep_boot)

irm_dml1 = DoubleMLIRM(dml_data_irm,
                       Lasso(),
                       LogisticRegression(),
import pytest
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_plr.fit()
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_pliv.fit()
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_irm.fit()
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())
dml_iivm.fit()

# fit models with callable scores
plr_score = dml_plr._score_elements
dml_plr_callable_score = DoubleMLPLR(dml_data_plr,
                                     Lasso(),
                                     Lasso(),
                                     score=plr_score,
                                     draw_sample_splitting=False)