Example #1
0
def run_experiment(scenario_name, repid):
    # set random seed
    seed = 527
    torch.manual_seed(seed)
    np.random.seed(seed)

    print("\nLoading " + scenario_name + "...")
    matname = ROOT_PATH + "/data/mendelian/" + scenario_name + '.mat'
    if not os.path.exists(matname):
        # set load_data(Torch=False)
        train, dev, test = load_data(ROOT_PATH + "/data/mendelian/" +
                                     scenario_name + '.npz',
                                     Torch=False)
        io.savemat(matname,
                   mdict={
                       'X_train': train.x,
                       'Y_train': train.y,
                       'Z_train': train.z,
                       'X_dev': dev.x,
                       'Y_dev': dev.y,
                       'Z_dev': dev.z,
                       'X_test': test.x,
                       'g_test': test.g
                   })

    train, dev, test = load_data(ROOT_PATH + "/data/mendelian/" +
                                 scenario_name + '.npz',
                                 Torch=True)
    folder = ROOT_PATH + "/results/mendelian/" + scenario_name + "/"
    os.makedirs(folder, exist_ok=True)
    for rep in range(repid, repid + 1):
        method = ToyModelSelectionMethod(f_input=train.z.shape[1],
                                         enable_cuda=torch.cuda.is_available())
        time = method.fit(train.x,
                          train.z,
                          train.y,
                          dev.x,
                          dev.z,
                          dev.y,
                          g_dev=dev.g,
                          verbose=True)
        np.save(folder + "deepgmm_%d_time.npy" % (rep), time)
        g_pred_test = method.predict(test.x)
        mse = float(((g_pred_test - test.g)**2).mean())

        print("--------------- " + str(rep))
        print("MSE on test:", mse)
        print("")
        print("saving results...")
        file_name = "deepgmm_%d.npz" % (rep)
        save_path = os.path.join(folder, file_name)
        np.savez(save_path,
                 x=test.w,
                 y=test.y,
                 g_true=test.g,
                 g_hat=g_pred_test.detach())
def run_experiment(scenario_name,repid,model_id=None,training=False):
    # set random seed
    seed = 527
    torch.manual_seed(seed)
    np.random.seed(seed)

    num_reps = 10

    print("\nLoading " + scenario_name + "...")
    train, dev, test = load_data(ROOT_PATH+'/data/'+scenario_name+'/main.npz',Torch=True,verbal=True)
    means = []
    for rep in range(num_reps):
        method_class = SCENARIO_METHOD_CLASSES[scenario_name]
        method = method_class(enable_cuda=torch.cuda.is_available())
        if training:
            if rep < repid:
                continue
            elif rep >repid:
                break
            else:
                pass
            print('here')
            method.fit(train.x, train.z, train.y, dev.x, dev.z, dev.y,
                       g_dev=dev.g,rep=rep,model_id=None,
                       verbose=True)
            g_pred_test = method.predict(test.x)
            mse = float(((g_pred_test - test.g) ** 2).mean())

            print("---------------")
            print("finished running methodology on scenario ",scenario_name)
            print("MSE on test:", mse)
            print("")
            print("saving results...")
            folder = ROOT_PATH+"/results/mnist/" + scenario_name + "/"
            file_name = "deepgmm_%d.npz" % rep
            save_path = os.path.join(folder, file_name)
            os.makedirs(folder, exist_ok=True)
            np.savez(save_path, x=test.w, y=test.y, g_true=test.g,
                     g_hat=g_pred_test.detach())
        else:
            folder = ROOT_PATH+"/results/mnist/" + scenario_name + "/"
            file_name = "deepgmm_%d.npz" % rep
            save_path = os.path.join(folder, file_name)
            if os.path.exists(save_path):
                res = np.load(save_path)
                means += [((res['g_true']-res['g_hat'])**2).mean()]
            else:
                print(save_path, ' not exists')
    return means
def run_experiment(scenario_name, repid, datasize):
    # set random seed
    seed = 527
    torch.manual_seed(seed)
    np.random.seed(seed)

    num_reps = 10

    print("\nLoading " + scenario_name + "...")
    train, dev, test = load_data(ROOT_PATH + '/data/zoo/' + scenario_name +
                                 '_{}.npz'.format(datasize),
                                 Torch=True)

    folder = ROOT_PATH + "/results/zoo/" + scenario_name + "/"
    os.makedirs(folder, exist_ok=True)
    for rep in range(repid, repid + 1):
        method = ToyModelSelectionMethod(enable_cuda=torch.cuda.is_available())
        time = method.fit(train.x,
                          train.z,
                          train.y,
                          dev.x,
                          dev.z,
                          dev.y,
                          g_dev=dev.g,
                          verbose=True)
        np.save(folder + "deepgmm_%d_%d_time.npy" % (rep, train.x.shape[0]),
                time)

        g_pred_test = method.predict(test.x)
        mse = float(((g_pred_test - test.g)**2).mean())

        print("--------------- " + str(rep))
        print("MSE on test:", mse)
        print("")
        print("saving results...")
        file_name = "deepgmm_%d_%d.npz" % (rep, train.x.shape[0])
        save_path = os.path.join(folder, file_name)
        np.savez(save_path,
                 x=test.w,
                 y=test.y,
                 g_true=test.g,
                 g_hat=g_pred_test.detach())
Example #4
0
def run_experiment(scenario_name,
                   mid,
                   repid,
                   num_reps=10,
                   seed=527,
                   training=False):
    # set random seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    tensorflow.set_random_seed(seed)

    train, dev, test = load_data(ROOT_PATH + '/data/' + scenario_name +
                                 '/main.npz',
                                 verbal=True)
    print(np.mean(train.x, axis=1))

    means = []
    for rep in range(num_reps):
        # Not all methods are applicable in all scenarios

        methods = []

        # baseline methods
        poly2sls_method = Poly2SLS(poly_degree=[1],
                                   ridge_alpha=np.logspace(-5, 3, 5))
        direct_method = None
        gmm_method = None
        deep_iv = all_baselines.DeepIV()
        if scenario_name == "mnist_z":
            deep_iv = all_baselines.DeepIV(treatment_model="cnn")
            gmm_method = GMM(g_model="2-layer", n_steps=10, g_epochs=10)
            direct_method = DirectNN()
        elif scenario_name == "mnist_x":
            gmm_method = GMM(g_model="mnist", n_steps=10, g_epochs=1)
            direct_method = DirectMNIST()
        elif scenario_name == "mnist_xz":
            deep_iv = all_baselines.DeepIV(treatment_model="cnn")
            gmm_method = GMM(g_model="mnist", n_steps=10, g_epochs=1)
            direct_method = DirectMNIST()

        methods += [("DirectNN", direct_method)]
        methods += [("Vanilla2SLS", Vanilla2SLS())]
        methods += [("Ridge2SLS", poly2sls_method)]
        methods += [("GMM", gmm_method)]
        methods += [("DeepIV", deep_iv)]

        if training:
            if rep < repid:
                continue
            elif rep > repid:
                break
            else:
                pass

            for method_name, method in methods[mid:mid + 1]:
                print("Running " + method_name)
                model, time = method.fit(train.x, train.y, train.z, None)
                folder = ROOT_PATH + "/results/mnist/" + scenario_name + "/"
                os.makedirs(folder, exist_ok=True)
                np.save(folder + '{}_{}_time.npy'.format(method_name, rep),
                        time)
                file_name = "%s_%d.npz" % (method_name, rep)
                save_path = os.path.join(folder, file_name)
                save_model(model, save_path, test)
                test_mse = eval_model(model, test)
                model_type_name = type(model).__name__
                print("Test MSE of %s: %f" % (model_type_name, test_mse))
        else:
            mean_rep = []
            for method_name, method in methods:
                folder = ROOT_PATH + "/results/mnist/" + scenario_name + "/"
                file_name = "%s_%d.npz" % (method_name, rep)
                save_path = os.path.join(folder, file_name)
                res = np.load(save_path)
                mean_rep += [((res['g_true'] - res['g_hat'])**2).mean()]
            print(mean_rep)
            means += [mean_rep]
    return means
Example #5
0
def run_experiment(scenario_name,
                   mid,
                   repid,
                   datasize,
                   num_reps=10,
                   seed=527,
                   training=False):
    # set random seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    tensorflow.set_random_seed(seed)

    train, dev, test = load_data(ROOT_PATH + '/data/zoo/' + scenario_name +
                                 '_{}.npz'.format(datasize))

    # result folder
    folder = ROOT_PATH + "/results/zoo/" + scenario_name + "/"
    os.makedirs(folder, exist_ok=True)
    means = []
    times = []
    for rep in range(num_reps):
        # Not all methods are applicable in all scenarios
        methods = []

        # baseline methods
        methods += [("DirectNN", DirectNN())]
        methods += [("Vanilla2SLS", Vanilla2SLS())]
        methods += [("Poly2SLS", Poly2SLS())]
        methods += [("GMM", GMM(g_model="2-layer", n_steps=20))]
        methods += [("AGMM", AGMM())]
        methods += [("DeepIV", DeepIV())]

        if training:
            if rep < repid:
                continue
            elif rep > repid:
                break
            else:
                pass
            for method_name, method in methods[mid:mid + 1]:
                print("Running " + method_name + " " + str(rep))
                file_name = "%s_%d_%d.npz" % (method_name, rep,
                                              train.x.shape[0])
                save_path = os.path.join(folder, file_name)

                model, time = method.fit(train.x, train.y, train.z, None)
                np.save(
                    folder + "%s_%d_%d_time.npy" %
                    (method_name, rep, train.x.shape[0]), time)
                save_model(model, save_path, test)
                test_mse = eval_model(model, test)
                model_type_name = type(model).__name__
                print("Test MSE of %s: %f" % (model_type_name, test_mse))
        else:
            means2 = []
            times2 = []
            for method_name, method in methods:
                # print("Running " + method_name +" " + str(rep))
                file_name = "%s_%d_%d.npz" % (method_name, rep, datasize)
                save_path = os.path.join(folder, file_name)
                if os.path.exists(save_path):
                    res = np.load(save_path)
                    mse = float(((res['g_hat'] - res['g_true'])**2).mean())
                    #                print('mse: {}'.format(mse))
                    means2 += [mse]
                else:
                    print(save_path, ' not exists')
                time_path = folder + "%s_%d_%d_time.npy" % (method_name, rep,
                                                            train.x.shape[0])
                if os.path.exists(time_path):
                    res = np.load(time_path)
                    times2 += [res]
                else:
                    print(time_path, ' not exists')
            if len(means2) == len(methods):
                means += [means2]
            if len(times2) == len(methods):
                times += [times2]
    return means, times