def test_doubleml_exception_confint():
    dml_plr_confint = DoubleMLPLR(dml_data, ml_g, ml_m)

    msg = 'joint must be True or False. Got 1.'
    with pytest.raises(TypeError, match=msg):
        dml_plr_confint.confint(joint=1)
    msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
    with pytest.raises(TypeError, match=msg):
        dml_plr_confint.confint(level='5%')
    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint(level=0.)

    msg = r'Apply fit\(\) before confint\(\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint()
    msg = r'Apply fit\(\) & bootstrap\(\) before confint\(joint=True\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint(joint=True)
    dml_plr_confint.fit()  # error message should still appear till bootstrap was applied as well
    with pytest.raises(ValueError, match=msg):
        dml_plr_confint.confint(joint=True)
    dml_plr_confint.bootstrap()
    df_ci = dml_plr_confint.confint(joint=True)
    assert isinstance(df_ci, pd.DataFrame)
def test_doubleml_exception_no_cross_fit():
    msg = 'Estimation without cross-fitting not supported for n_folds > 2.'
    with pytest.raises(AssertionError, match=msg):
        _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=False)
def test_doubleml_exception_bootstrap():
    dml_plr_boot = DoubleMLPLR(dml_data, ml_g, ml_m)
    msg = r'Apply fit\(\) before bootstrap\(\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_boot.bootstrap()

    dml_plr_boot.fit()
    msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
    with pytest.raises(ValueError, match=msg):
        dml_plr_boot.bootstrap(method='Gaussian')
    msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
    with pytest.raises(TypeError, match=msg):
        dml_plr_boot.bootstrap(n_rep_boot='500')
    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
    with pytest.raises(ValueError, match=msg):
        dml_plr_boot.bootstrap(n_rep_boot=0)
import pandas as pd
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\
    make_pliv_multiway_cluster_CKMS2021

from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.base import BaseEstimator

np.random.seed(3141)
dml_data = make_plr_CCDDHNR2018(n_obs=10)
ml_g = Lasso()
ml_m = Lasso()
ml_r = Lasso()
dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m)

dml_data_irm = make_irm_data(n_obs=10)
dml_data_iivm = make_iivm_data(n_obs=10)
dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1)
dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
(x, y, d, z) = make_iivm_data(n_obs=30, return_type="array")
y[y > 0] = 1
y[y < 0] = 0
dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d)
dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z)


@pytest.mark.ci
def test_doubleml_exception_data():
    msg = 'The data must be of DoubleMLData type.'
def test_doubleml_warning_crossfitting_onefold():
    msg = 'apply_cross_fitting is set to False. Cross-fitting is not supported for n_folds = 1.'
    with pytest.warns(UserWarning, match=msg):
        _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=True, n_folds=1)
def test_doubleml_draw_vs_set():
    np.random.seed(3141)
    dml_plr_set = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=7, n_rep=8)

    dml_plr_drawn = DoubleMLPLR(dml_data,
                                ml_g,
                                ml_m,
                                n_folds=1,
                                n_rep=1,
                                apply_cross_fitting=False)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0])
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)

    dml_plr_drawn = DoubleMLPLR(dml_data,
                                ml_g,
                                ml_m,
                                n_folds=2,
                                n_rep=1,
                                apply_cross_fitting=False)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0])
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)

    dml_plr_drawn = DoubleMLPLR(dml_data,
                                ml_g,
                                ml_m,
                                n_folds=2,
                                n_rep=1,
                                apply_cross_fitting=True)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)

    dml_plr_drawn = DoubleMLPLR(dml_data,
                                ml_g,
                                ml_m,
                                n_folds=5,
                                n_rep=1,
                                apply_cross_fitting=True)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)

    dml_plr_drawn = DoubleMLPLR(dml_data,
                                ml_g,
                                ml_m,
                                n_folds=5,
                                n_rep=3,
                                apply_cross_fitting=True)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)

    dml_plr_drawn = DoubleMLPLR(dml_data,
                                ml_g,
                                ml_m,
                                n_folds=2,
                                n_rep=4,
                                apply_cross_fitting=False)
    dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
    _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
예제 #7
0
import pytest
import pandas as pd
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())


@pytest.mark.ci
@pytest.mark.parametrize('dml_obj, cls', [(dml_plr, DoubleMLPLR),
                                          (dml_pliv, DoubleMLPLIV),
                                          (dml_irm, DoubleMLIRM),
                                          (dml_iivm, DoubleMLIIVM)])
def test_plr_return_types(dml_obj, cls):
    # ToDo: A second test case with multiple treatment variables would be helpful
    assert isinstance(dml_obj.__str__(), str)
    assert isinstance(dml_obj.summary, pd.DataFrame)
import pytest
import numpy as np

from doubleml import DoubleMLPLR
from doubleml.datasets import make_plr_CCDDHNR2018

from sklearn.linear_model import Lasso

np.random.seed(3141)
dml_data = make_plr_CCDDHNR2018(n_obs=10)
ml_g = Lasso()
ml_m = Lasso()
dml_plr = DoubleMLPLR(dml_data,
                      ml_g,
                      ml_m,
                      n_folds=7,
                      n_rep=8,
                      draw_sample_splitting=False)


def _assert_resampling_pars(dml_obj0, dml_obj1):
    assert dml_obj0.n_folds == dml_obj1.n_folds
    assert dml_obj0.n_rep == dml_obj1.n_rep
    assert dml_obj0.apply_cross_fitting == dml_obj1.apply_cross_fitting
    _assert_smpls_equal(dml_obj0.smpls, dml_obj1.smpls,
                        dml_obj0.apply_cross_fitting)


def _assert_smpls_equal(smpls0, smpls1, apply_cross_fitting=True):
    assert len(smpls0) == len(smpls1)
    for i_rep, _ in enumerate(smpls0):
import pytest
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())


def _assert_resampling_default_settings(dml_obj):
    assert dml_obj.n_folds == 5
    assert dml_obj.n_rep == 1
    assert dml_obj.draw_sample_splitting
    assert dml_obj.apply_cross_fitting


@pytest.mark.ci
def test_plr_defaults():
    _assert_resampling_default_settings(dml_plr)
예제 #10
0
import pytest
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_plr.fit()
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_pliv.fit()
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_irm.fit()
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())
dml_iivm.fit()

# fit models with callable scores
plr_score = dml_plr._score_elements
dml_plr_callable_score = DoubleMLPLR(dml_data_plr,
                                     Lasso(),
                                     Lasso(),
                                     score=plr_score,
                                     draw_sample_splitting=False)
def test_doubleml_exception_p_adjust():
    dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_g, ml_m)

    msg = r'Apply fit\(\) before p_adjust\(\).'
    with pytest.raises(ValueError, match=msg):
        dml_plr_p_adjust.p_adjust()
    dml_plr_p_adjust.fit()
    msg = r'Apply fit\(\) & bootstrap\(\) before p_adjust'
    with pytest.raises(ValueError, match=msg):
        dml_plr_p_adjust.p_adjust(method='romano-wolf')
    dml_plr_p_adjust.bootstrap()
    p_val = dml_plr_p_adjust.p_adjust(method='romano-wolf')
    assert isinstance(p_val, pd.DataFrame)

    msg = "The p_adjust method must be of str type. 0.05 of type <class 'float'> was passed."
    with pytest.raises(TypeError, match=msg):
        dml_plr_p_adjust.p_adjust(method=0.05)
def test_doubleml_exception_smpls():
    msg = ('Sample splitting not specified. '
           r'Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\).')
    dml_plr_no_smpls = DoubleMLPLR(dml_data, ml_g, ml_m, draw_sample_splitting=False)
    with pytest.raises(ValueError, match=msg):
        _ = dml_plr_no_smpls.smpls