예제 #1
0
def test_lalonde_defaults():
    w1, t1, y1 = load_lalonde()
    w2, t2, y2 = load_lalonde(rct_version='dw',
                              obs_version='psid1',
                              data_format=NUMPY)
    np.testing.assert_array_equal(w1, w2)
    np.testing.assert_array_equal(t1, t2)
    np.testing.assert_array_equal(y1, y2)
예제 #2
0
def test_lalonde_original_cps1():
    (w, t, y) = load_lalonde(rct_version='lalonde', obs_version='cps1')
    n = 15992 + 297
    assert w.shape == (n, 7)
    assert t.shape == (n, )
    assert y.shape == (n, )
    ate = y[t == 1].mean() - y[t == 0].mean()
    assert round(ate) == -8870
예제 #3
0
def test_lalonde_original_psid1():
    (w, t, y) = load_lalonde(rct_version='lalonde', obs_version='psid1')
    n = 2490 + 297
    assert w.shape == (n, 7)
    assert t.shape == (n, )
    assert y.shape == (n, )
    ate = y[t == 1].mean() - y[t == 0].mean()
    assert round(ate) == -15578
예제 #4
0
def test_lalonde_cps1():
    (w, t, y) = load_lalonde(rct_version='dw', obs_version='cps1')
    n = 15992 + 185
    assert w.shape == (n, 8)
    assert t.shape == (n, )
    assert y.shape == (n, )
    ate = y[t == 1].mean() - y[t == 0].mean()
    assert round(ate) == -8498
예제 #5
0
def test_lalonde_psid1():
    (w, t, y) = load_lalonde(rct_version='dw', obs_version='psid1')
    n = 2490 + 185
    assert w.shape == (n, 8)
    assert t.shape == (n, )
    assert y.shape == (n, )
    ate = y[t == 1].mean() - y[t == 0].mean()
    assert round(ate) == -15205
예제 #6
0
def test_lalonde_original_rct():
    (w, t, y) = load_lalonde(rct_version='lalonde', rct=True)
    n = 425 + 297
    assert w.shape == (n, 7)
    assert t.shape == (n, )
    assert y.shape == (n, )
    ate = y[t == 1].mean() - y[t == 0].mean()
    assert round(ate) == 886
예제 #7
0
def test_lalonde_dw_rct():
    (w, t, y) = load_lalonde(rct_version='dw', rct=True)
    n = 260 + 185
    assert w.shape == (n, 8)
    assert t.shape == (n, )
    assert y.shape == (n, )
    ate = y[t == 1].mean() - y[t == 0].mean()
    assert round(ate) == 1794
예제 #8
0
def get_data(args):
    data_name = args.data.lower()
    ate = None
    ites = None
    if data_name == "lalonde" or data_name == "lalonde_psid" or data_name == "lalonde_psid1":
        w, t, y = load_lalonde(obs_version="psid", dataroot=args.dataroot)
    elif data_name == "lalonde_rct":
        w, t, y = load_lalonde(rct=True, dataroot=args.dataroot)
    elif data_name == "lalonde_cps" or data_name == "lalonde_cps1":
        w, t, y = load_lalonde(obs_version="cps", dataroot=args.dataroot)
    elif data_name.startswith("lbidd"):
        # Valid string formats: lbidd_<link>_<n> and lbidd_<link>_<n>_counterfactual
        # Valid <link> options: linear, quadratic, cubic, exp, and log
        # Valid <n> options: 1k, 2.5k, 5k, 10k, 25k, and 50k
        options = data_name.split("_")
        link = options[1]
        n = options[2]
        observe_counterfactuals = (len(options) == 4) and (options[3]
                                                           == "counterfactual")
        d = load_lbidd(n=n,
                       observe_counterfactuals=observe_counterfactuals,
                       link=link,
                       dataroot=args.dataroot,
                       return_ate=True,
                       return_ites=True)
        ate = d["ate"]
        ites = d['ites']
        if observe_counterfactuals:
            w, t, y = d["obs_counterfactual_w"], d["obs_counterfactual_t"], d[
                "obs_counterfactual_y"]
        else:
            w, t, y = d["w"], d["t"], d["y"]
    elif data_name == "ihdp":
        d = load_ihdp(return_ate=True, return_ites=True)
        w, t, y, ate, ites = d["w"], d["t"], d["y"], d['ate'], d['ites']
    elif data_name == "ihdp_counterfactual":
        d = load_ihdp(observe_counterfactuals=True)
        w, t, y = d["w"], d["t"], d["y"]
    elif data_name == "twins":
        d = load_twins(dataroot=args.dataroot)
        w, t, y = d["w"], d["t"], d["y"]
    else:
        raise (Exception("dataset {} not implemented".format(args.data)))

    return ites, ate, w, t, y
예제 #9
0
        loss_y = self.outcome_distribution.loss(y, y_)
        loss = loss_t + loss_y
        return loss, loss_t, loss_y


if __name__ == "__main__":
    from data.lalonde import load_lalonde
    import matplotlib.pyplot as plt
    import pprint

    pp = pprint.PrettyPrinter(indent=4)

    dataset = 1
    network_params = _DEFAULT_TARNET.copy()
    if dataset == 1:
        w, t, y = load_lalonde()
        dist = distributions.MixedDistribution([0.0], distributions.LogLogistic())
        training_params = TrainingParams(lr=0.0005, batch_size=128, num_epochs=100, verbose=False)
        early_stop = True
        ignore_w = False
    elif dataset == 2:
        w, t, y = load_lalonde(rct=True)
        # dist = distributions.MixedDistribution([0.0], distributions.LogLogistic())
        dist = distributions.FactorialGaussian()
        training_params = TrainingParams(lr=0.001, batch_size=64, num_epochs=200)
        early_stop = True
        ignore_w = False
    elif dataset == 3:
        w, t, y = load_lalonde(obs_version="cps1")
        dist = distributions.MixedDistribution(
            [0.0, 25564.669921875], distributions.SigmoidFlow(ndim=10)
예제 #10
0
def test_lalonde_torch():
    (w, t, y) = load_lalonde(data_format=TORCH)
    assert all(isinstance(x, torch.Tensor) for x in (w, t, y))
예제 #11
0
def test_lalonde_pandas_single():
    df = load_lalonde(data_format=PANDAS_SINGLE)
    assert isinstance(df, pd.DataFrame)
예제 #12
0
def test_lalonde_pandas():
    w, t, y = load_lalonde(data_format=PANDAS)
    assert isinstance(w, pd.DataFrame) and isinstance(
        t, pd.Series) and isinstance(y, pd.Series)