def test_make_irm_data_return_types(): np.random.seed(3141) res = make_irm_data(n_obs=100, return_type='DoubleMLData') assert isinstance(res, DoubleMLData) res = make_irm_data(n_obs=100, return_type='DataFrame') assert isinstance(res, pd.DataFrame) x, y, d = make_irm_data(n_obs=100, return_type='array') assert isinstance(x, np.ndarray) assert isinstance(y, np.ndarray) assert isinstance(d, np.ndarray) with pytest.raises(ValueError, match=msg_inv_return_type): _ = make_irm_data(n_obs=100, return_type='matrix')
def generate_data_irm(request): n_p = request.param np.random.seed(1111) # setting parameters n = n_p[0] p = n_p[1] theta = 0.5 # generating data data = make_irm_data(n, p, theta, return_type='array') return data
def generate_data_irm(request): N_p = request.param np.random.seed(1111) # setting parameters N = N_p[0] p = N_p[1] theta = 0.5 # generating data datasets = [] for i in range(n_datasets): data = make_irm_data(N, p, theta, return_type='array') datasets.append(data) return datasets
def generate_data_irm_w_missings(request): n_p = request.param np.random.seed(1111) # setting parameters n = n_p[0] p = n_p[1] theta = 0.5 # generating data (x, y, d) = make_irm_data(n, p, theta, return_type='array') # randomly set some entries to np.nan ind = np.random.choice(np.arange(x.size), replace=False, size=int(x.size * 0.05)) x[np.unravel_index(ind, x.shape)] = np.nan data = (x, y, d) return data
from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\ make_pliv_multiway_cluster_CKMS2021 from sklearn.linear_model import Lasso, LogisticRegression from sklearn.base import BaseEstimator np.random.seed(3141) dml_data = make_plr_CCDDHNR2018(n_obs=10) ml_g = Lasso() ml_m = Lasso() ml_r = Lasso() dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m) dml_data_irm = make_irm_data(n_obs=10) dml_data_iivm = make_iivm_data(n_obs=10) dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1) dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10) (x, y, d, z) = make_iivm_data(n_obs=30, return_type="array") y[y > 0] = 1 y[y < 0] = 0 dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d) dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z) @pytest.mark.ci def test_doubleml_exception_data(): msg = 'The data must be of DoubleMLData type.' with pytest.raises(TypeError, match=msg): _ = DoubleMLPLR(pd.DataFrame(), ml_g, ml_m)