def test_lalonde_defaults(): w1, t1, y1 = load_lalonde() w2, t2, y2 = load_lalonde(rct_version='dw', obs_version='psid1', data_format=NUMPY) np.testing.assert_array_equal(w1, w2) np.testing.assert_array_equal(t1, t2) np.testing.assert_array_equal(y1, y2)
def test_lalonde_original_cps1(): (w, t, y) = load_lalonde(rct_version='lalonde', obs_version='cps1') n = 15992 + 297 assert w.shape == (n, 7) assert t.shape == (n, ) assert y.shape == (n, ) ate = y[t == 1].mean() - y[t == 0].mean() assert round(ate) == -8870
def test_lalonde_original_psid1(): (w, t, y) = load_lalonde(rct_version='lalonde', obs_version='psid1') n = 2490 + 297 assert w.shape == (n, 7) assert t.shape == (n, ) assert y.shape == (n, ) ate = y[t == 1].mean() - y[t == 0].mean() assert round(ate) == -15578
def test_lalonde_cps1(): (w, t, y) = load_lalonde(rct_version='dw', obs_version='cps1') n = 15992 + 185 assert w.shape == (n, 8) assert t.shape == (n, ) assert y.shape == (n, ) ate = y[t == 1].mean() - y[t == 0].mean() assert round(ate) == -8498
def test_lalonde_psid1(): (w, t, y) = load_lalonde(rct_version='dw', obs_version='psid1') n = 2490 + 185 assert w.shape == (n, 8) assert t.shape == (n, ) assert y.shape == (n, ) ate = y[t == 1].mean() - y[t == 0].mean() assert round(ate) == -15205
def test_lalonde_original_rct(): (w, t, y) = load_lalonde(rct_version='lalonde', rct=True) n = 425 + 297 assert w.shape == (n, 7) assert t.shape == (n, ) assert y.shape == (n, ) ate = y[t == 1].mean() - y[t == 0].mean() assert round(ate) == 886
def test_lalonde_dw_rct(): (w, t, y) = load_lalonde(rct_version='dw', rct=True) n = 260 + 185 assert w.shape == (n, 8) assert t.shape == (n, ) assert y.shape == (n, ) ate = y[t == 1].mean() - y[t == 0].mean() assert round(ate) == 1794
def get_data(args): data_name = args.data.lower() ate = None ites = None if data_name == "lalonde" or data_name == "lalonde_psid" or data_name == "lalonde_psid1": w, t, y = load_lalonde(obs_version="psid", dataroot=args.dataroot) elif data_name == "lalonde_rct": w, t, y = load_lalonde(rct=True, dataroot=args.dataroot) elif data_name == "lalonde_cps" or data_name == "lalonde_cps1": w, t, y = load_lalonde(obs_version="cps", dataroot=args.dataroot) elif data_name.startswith("lbidd"): # Valid string formats: lbidd_<link>_<n> and lbidd_<link>_<n>_counterfactual # Valid <link> options: linear, quadratic, cubic, exp, and log # Valid <n> options: 1k, 2.5k, 5k, 10k, 25k, and 50k options = data_name.split("_") link = options[1] n = options[2] observe_counterfactuals = (len(options) == 4) and (options[3] == "counterfactual") d = load_lbidd(n=n, observe_counterfactuals=observe_counterfactuals, link=link, dataroot=args.dataroot, return_ate=True, return_ites=True) ate = d["ate"] ites = d['ites'] if observe_counterfactuals: w, t, y = d["obs_counterfactual_w"], d["obs_counterfactual_t"], d[ "obs_counterfactual_y"] else: w, t, y = d["w"], d["t"], d["y"] elif data_name == "ihdp": d = load_ihdp(return_ate=True, return_ites=True) w, t, y, ate, ites = d["w"], d["t"], d["y"], d['ate'], d['ites'] elif data_name == "ihdp_counterfactual": d = load_ihdp(observe_counterfactuals=True) w, t, y = d["w"], d["t"], d["y"] elif data_name == "twins": d = load_twins(dataroot=args.dataroot) w, t, y = d["w"], d["t"], d["y"] else: raise (Exception("dataset {} not implemented".format(args.data))) return ites, ate, w, t, y
loss_y = self.outcome_distribution.loss(y, y_) loss = loss_t + loss_y return loss, loss_t, loss_y if __name__ == "__main__": from data.lalonde import load_lalonde import matplotlib.pyplot as plt import pprint pp = pprint.PrettyPrinter(indent=4) dataset = 1 network_params = _DEFAULT_TARNET.copy() if dataset == 1: w, t, y = load_lalonde() dist = distributions.MixedDistribution([0.0], distributions.LogLogistic()) training_params = TrainingParams(lr=0.0005, batch_size=128, num_epochs=100, verbose=False) early_stop = True ignore_w = False elif dataset == 2: w, t, y = load_lalonde(rct=True) # dist = distributions.MixedDistribution([0.0], distributions.LogLogistic()) dist = distributions.FactorialGaussian() training_params = TrainingParams(lr=0.001, batch_size=64, num_epochs=200) early_stop = True ignore_w = False elif dataset == 3: w, t, y = load_lalonde(obs_version="cps1") dist = distributions.MixedDistribution( [0.0, 25564.669921875], distributions.SigmoidFlow(ndim=10)
def test_lalonde_torch(): (w, t, y) = load_lalonde(data_format=TORCH) assert all(isinstance(x, torch.Tensor) for x in (w, t, y))
def test_lalonde_pandas_single(): df = load_lalonde(data_format=PANDAS_SINGLE) assert isinstance(df, pd.DataFrame)
def test_lalonde_pandas(): w, t, y = load_lalonde(data_format=PANDAS) assert isinstance(w, pd.DataFrame) and isinstance( t, pd.Series) and isinstance(y, pd.Series)