def run_experiment(scenario_name, repid): # set random seed seed = 527 torch.manual_seed(seed) np.random.seed(seed) print("\nLoading " + scenario_name + "...") matname = ROOT_PATH + "/data/mendelian/" + scenario_name + '.mat' if not os.path.exists(matname): # set load_data(Torch=False) train, dev, test = load_data(ROOT_PATH + "/data/mendelian/" + scenario_name + '.npz', Torch=False) io.savemat(matname, mdict={ 'X_train': train.x, 'Y_train': train.y, 'Z_train': train.z, 'X_dev': dev.x, 'Y_dev': dev.y, 'Z_dev': dev.z, 'X_test': test.x, 'g_test': test.g }) train, dev, test = load_data(ROOT_PATH + "/data/mendelian/" + scenario_name + '.npz', Torch=True) folder = ROOT_PATH + "/results/mendelian/" + scenario_name + "/" os.makedirs(folder, exist_ok=True) for rep in range(repid, repid + 1): method = ToyModelSelectionMethod(f_input=train.z.shape[1], enable_cuda=torch.cuda.is_available()) time = method.fit(train.x, train.z, train.y, dev.x, dev.z, dev.y, g_dev=dev.g, verbose=True) np.save(folder + "deepgmm_%d_time.npy" % (rep), time) g_pred_test = method.predict(test.x) mse = float(((g_pred_test - test.g)**2).mean()) print("--------------- " + str(rep)) print("MSE on test:", mse) print("") print("saving results...") file_name = "deepgmm_%d.npz" % (rep) save_path = os.path.join(folder, file_name) np.savez(save_path, x=test.w, y=test.y, g_true=test.g, g_hat=g_pred_test.detach())
def run_experiment(scenario_name,repid,model_id=None,training=False): # set random seed seed = 527 torch.manual_seed(seed) np.random.seed(seed) num_reps = 10 print("\nLoading " + scenario_name + "...") train, dev, test = load_data(ROOT_PATH+'/data/'+scenario_name+'/main.npz',Torch=True,verbal=True) means = [] for rep in range(num_reps): method_class = SCENARIO_METHOD_CLASSES[scenario_name] method = method_class(enable_cuda=torch.cuda.is_available()) if training: if rep < repid: continue elif rep >repid: break else: pass print('here') method.fit(train.x, train.z, train.y, dev.x, dev.z, dev.y, g_dev=dev.g,rep=rep,model_id=None, verbose=True) g_pred_test = method.predict(test.x) mse = float(((g_pred_test - test.g) ** 2).mean()) print("---------------") print("finished running methodology on scenario ",scenario_name) print("MSE on test:", mse) print("") print("saving results...") folder = ROOT_PATH+"/results/mnist/" + scenario_name + "/" file_name = "deepgmm_%d.npz" % rep save_path = os.path.join(folder, file_name) os.makedirs(folder, exist_ok=True) np.savez(save_path, x=test.w, y=test.y, g_true=test.g, g_hat=g_pred_test.detach()) else: folder = ROOT_PATH+"/results/mnist/" + scenario_name + "/" file_name = "deepgmm_%d.npz" % rep save_path = os.path.join(folder, file_name) if os.path.exists(save_path): res = np.load(save_path) means += [((res['g_true']-res['g_hat'])**2).mean()] else: print(save_path, ' not exists') return means
def run_experiment(scenario_name, repid, datasize): # set random seed seed = 527 torch.manual_seed(seed) np.random.seed(seed) num_reps = 10 print("\nLoading " + scenario_name + "...") train, dev, test = load_data(ROOT_PATH + '/data/zoo/' + scenario_name + '_{}.npz'.format(datasize), Torch=True) folder = ROOT_PATH + "/results/zoo/" + scenario_name + "/" os.makedirs(folder, exist_ok=True) for rep in range(repid, repid + 1): method = ToyModelSelectionMethod(enable_cuda=torch.cuda.is_available()) time = method.fit(train.x, train.z, train.y, dev.x, dev.z, dev.y, g_dev=dev.g, verbose=True) np.save(folder + "deepgmm_%d_%d_time.npy" % (rep, train.x.shape[0]), time) g_pred_test = method.predict(test.x) mse = float(((g_pred_test - test.g)**2).mean()) print("--------------- " + str(rep)) print("MSE on test:", mse) print("") print("saving results...") file_name = "deepgmm_%d_%d.npz" % (rep, train.x.shape[0]) save_path = os.path.join(folder, file_name) np.savez(save_path, x=test.w, y=test.y, g_true=test.g, g_hat=g_pred_test.detach())
def run_experiment(scenario_name, mid, repid, num_reps=10, seed=527, training=False): # set random seed torch.manual_seed(seed) np.random.seed(seed) tensorflow.set_random_seed(seed) train, dev, test = load_data(ROOT_PATH + '/data/' + scenario_name + '/main.npz', verbal=True) print(np.mean(train.x, axis=1)) means = [] for rep in range(num_reps): # Not all methods are applicable in all scenarios methods = [] # baseline methods poly2sls_method = Poly2SLS(poly_degree=[1], ridge_alpha=np.logspace(-5, 3, 5)) direct_method = None gmm_method = None deep_iv = all_baselines.DeepIV() if scenario_name == "mnist_z": deep_iv = all_baselines.DeepIV(treatment_model="cnn") gmm_method = GMM(g_model="2-layer", n_steps=10, g_epochs=10) direct_method = DirectNN() elif scenario_name == "mnist_x": gmm_method = GMM(g_model="mnist", n_steps=10, g_epochs=1) direct_method = DirectMNIST() elif scenario_name == "mnist_xz": deep_iv = all_baselines.DeepIV(treatment_model="cnn") gmm_method = GMM(g_model="mnist", n_steps=10, g_epochs=1) direct_method = DirectMNIST() methods += [("DirectNN", direct_method)] methods += [("Vanilla2SLS", Vanilla2SLS())] methods += [("Ridge2SLS", poly2sls_method)] methods += [("GMM", gmm_method)] methods += [("DeepIV", deep_iv)] if training: if rep < repid: continue elif rep > repid: break else: pass for method_name, method in methods[mid:mid + 1]: print("Running " + method_name) model, time = method.fit(train.x, train.y, train.z, None) folder = ROOT_PATH + "/results/mnist/" + scenario_name + "/" os.makedirs(folder, exist_ok=True) np.save(folder + '{}_{}_time.npy'.format(method_name, rep), time) file_name = "%s_%d.npz" % (method_name, rep) save_path = os.path.join(folder, file_name) save_model(model, save_path, test) test_mse = eval_model(model, test) model_type_name = type(model).__name__ print("Test MSE of %s: %f" % (model_type_name, test_mse)) else: mean_rep = [] for method_name, method in methods: folder = ROOT_PATH + "/results/mnist/" + scenario_name + "/" file_name = "%s_%d.npz" % (method_name, rep) save_path = os.path.join(folder, file_name) res = np.load(save_path) mean_rep += [((res['g_true'] - res['g_hat'])**2).mean()] print(mean_rep) means += [mean_rep] return means
def run_experiment(scenario_name, mid, repid, datasize, num_reps=10, seed=527, training=False): # set random seed torch.manual_seed(seed) np.random.seed(seed) tensorflow.set_random_seed(seed) train, dev, test = load_data(ROOT_PATH + '/data/zoo/' + scenario_name + '_{}.npz'.format(datasize)) # result folder folder = ROOT_PATH + "/results/zoo/" + scenario_name + "/" os.makedirs(folder, exist_ok=True) means = [] times = [] for rep in range(num_reps): # Not all methods are applicable in all scenarios methods = [] # baseline methods methods += [("DirectNN", DirectNN())] methods += [("Vanilla2SLS", Vanilla2SLS())] methods += [("Poly2SLS", Poly2SLS())] methods += [("GMM", GMM(g_model="2-layer", n_steps=20))] methods += [("AGMM", AGMM())] methods += [("DeepIV", DeepIV())] if training: if rep < repid: continue elif rep > repid: break else: pass for method_name, method in methods[mid:mid + 1]: print("Running " + method_name + " " + str(rep)) file_name = "%s_%d_%d.npz" % (method_name, rep, train.x.shape[0]) save_path = os.path.join(folder, file_name) model, time = method.fit(train.x, train.y, train.z, None) np.save( folder + "%s_%d_%d_time.npy" % (method_name, rep, train.x.shape[0]), time) save_model(model, save_path, test) test_mse = eval_model(model, test) model_type_name = type(model).__name__ print("Test MSE of %s: %f" % (model_type_name, test_mse)) else: means2 = [] times2 = [] for method_name, method in methods: # print("Running " + method_name +" " + str(rep)) file_name = "%s_%d_%d.npz" % (method_name, rep, datasize) save_path = os.path.join(folder, file_name) if os.path.exists(save_path): res = np.load(save_path) mse = float(((res['g_hat'] - res['g_true'])**2).mean()) # print('mse: {}'.format(mse)) means2 += [mse] else: print(save_path, ' not exists') time_path = folder + "%s_%d_%d_time.npy" % (method_name, rep, train.x.shape[0]) if os.path.exists(time_path): res = np.load(time_path) times2 += [res] else: print(time_path, ' not exists') if len(means2) == len(methods): means += [means2] if len(times2) == len(methods): times += [times2] return means, times