Esempio n. 1
0
def test_make_irm_data_return_types():
    np.random.seed(3141)
    res = make_irm_data(n_obs=100, return_type='DoubleMLData')
    assert isinstance(res, DoubleMLData)
    res = make_irm_data(n_obs=100, return_type='DataFrame')
    assert isinstance(res, pd.DataFrame)
    x, y, d = make_irm_data(n_obs=100, return_type='array')
    assert isinstance(x, np.ndarray)
    assert isinstance(y, np.ndarray)
    assert isinstance(d, np.ndarray)
    with pytest.raises(ValueError, match=msg_inv_return_type):
        _ = make_irm_data(n_obs=100, return_type='matrix')
Esempio n. 2
0
def generate_data_irm(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p[0]
    p = n_p[1]
    theta = 0.5

    # generating data
    data = make_irm_data(n, p, theta, return_type='array')

    return data
Esempio n. 3
0
def generate_data_irm(request):
    N_p = request.param
    np.random.seed(1111)
    # setting parameters
    N = N_p[0]
    p = N_p[1]
    theta = 0.5

    # generating data
    datasets = []
    for i in range(n_datasets):
        data = make_irm_data(N, p, theta, return_type='array')
        datasets.append(data)

    return datasets
Esempio n. 4
0
def generate_data_irm_w_missings(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p[0]
    p = n_p[1]
    theta = 0.5

    # generating data
    (x, y, d) = make_irm_data(n, p, theta, return_type='array')

    # randomly set some entries to np.nan
    ind = np.random.choice(np.arange(x.size), replace=False,
                           size=int(x.size * 0.05))
    x[np.unravel_index(ind, x.shape)] = np.nan
    data = (x, y, d)

    return data
from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\
    make_pliv_multiway_cluster_CKMS2021

from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.base import BaseEstimator

np.random.seed(3141)
dml_data = make_plr_CCDDHNR2018(n_obs=10)
ml_g = Lasso()
ml_m = Lasso()
ml_r = Lasso()
dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m)

dml_data_irm = make_irm_data(n_obs=10)
dml_data_iivm = make_iivm_data(n_obs=10)
dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1)
dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
(x, y, d, z) = make_iivm_data(n_obs=30, return_type="array")
y[y > 0] = 1
y[y < 0] = 0
dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d)
dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z)


@pytest.mark.ci
def test_doubleml_exception_data():
    msg = 'The data must be of DoubleMLData type.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(pd.DataFrame(), ml_g, ml_m)