def plot(self, output_dir): def plot3d(): fig = plt.figure() ax = Axes3D(fig) X_seq, T_seq = self.point_info_manager.X_seq, self.point_info_manager.T_seq if self.gt_available: c_true, lower, upper = normalization.zero_one_normalization(self.z) c_true = cm.bwr(c_true * 255) ax.scatter([x[0] for x in self.X_grid.astype(float)], [x[1] for x in self.X_grid.astype(float)], [x[2] for x in self.X_grid.astype(float)], c=c_true, marker='o', alpha=0.5, s=5) c = cm.bwr(normalization.zero_one_normalization(T_seq, self.z.min(), self.z.max())[0] * 255) else: c = cm.bwr(normalization.zero_one_normalization(T_seq)[0] * 255) ax.scatter([x[0] for x in X_seq], [x[1] for x in X_seq], [x[2] for x in X_seq], c='y', marker='o', alpha=0.5) if self.does_pairwise_sampling: ax.scatter(X_seq[-1][0], X_seq[-1][1], X_seq[-1][2], c='m', s=50, marker='o', alpha=1.0) ax.scatter(X_seq[-2][0], X_seq[-2][1], X_seq[-2][2], c='m', s=100, marker='o', alpha=1.0) else: ax.scatter(X_seq[-1][0], X_seq[-1][1], X_seq[-1][2], c='m', s=50, marker='o', alpha=1.0) def plot2d(): acq_score = self.acquisition_func.compute(self.mu, self.sigma, self.point_info_manager.get_T()) mu = self.mu.flatten() X, T = self.point_info_manager.get_observed_XT_pair(gets_real=True) X_seq, T_seq = self.point_info_manager.X_seq, self.point_info_manager.T_seq if self.normalize_output: mu = self.point_info_manager.get_unnormalized_value_list(mu) acq_score = self.point_info_manager.get_unnormalized_value_list(acq_score) if self.acquisition_func_name == 'ucb': fig = plt.figure() ax = Axes3D(fig) ax.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), mu.reshape(self.meshgrid[0].shape), alpha=0.5, color='g') ax.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), acq_score.reshape(self.meshgrid[0].shape), alpha=0.5, color='y') if self.gt_available: ax.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), self.z, alpha=0.3, color='b') ax.scatter([x[0] for x in X], [x[1] for x in X], T, c='r', marker='o', alpha=0.5) if self.does_pairwise_sampling: ax.scatter(X_seq[-2][0], X_seq[-2][1], T_seq[-2], c='m', s=100, marker='o', alpha=1.0) ax.scatter(X_seq[-1][0], X_seq[-1][1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0) else: fig = plt.figure(figsize=(6, 10)) fig.subplots_adjust(right=0.8) upper = fig.add_subplot(2, 1, 1, projection='3d') lower = fig.add_subplot(2, 1, 2, projection='3d') upper.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), mu.reshape(self.meshgrid[0].shape), alpha=0.5, color='g') lower.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), acq_score.reshape(self.meshgrid[0].shape), alpha=0.5, color='y') if self.gt_available: upper.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), self.z, alpha=0.3, color='b') upper.scatter([x[0] for x in X], [x[1] for x in X], T, c='r', marker='o', alpha=0.5) if self.does_pairwise_sampling: upper.scatter(X_seq[-2][0], X_seq[-2][1], T_seq[-2], c='m', s=100, marker='o', alpha=1.0) upper.scatter(X_seq[-1][0], X_seq[-1][1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0) # upper.set_zlabel('f(x)', fontdict={'size': 18}) # lower.set_zlabel(self.acquisition_func_name.upper(), fontdict={'size': 18}) def plot1d(): acq_score = self.acquisition_func.compute(self.mu, self.sigma, self.point_info_manager.get_T()) mu = self.mu.flatten() if self.normalize_output: mu = self.point_info_manager.get_unnormalized_value_list(mu) acq_score = self.point_info_manager.get_unnormalized_value_list(acq_score) X, T = self.point_info_manager.get_observed_XT_pair(gets_real=True) X_seq, T_seq = self.point_info_manager.X_seq, self.point_info_manager.T_seq if self.acquisition_func_name == 'ucb': plt.plot(self.meshgrid[0].astype(float), mu, color='g') plt.plot(self.meshgrid[0].astype(float), acq_score, color='y') plt.plot(self.meshgrid[0], self.z, alpha=0.3, color='b') plt.scatter(X, T, c='r', s=10, marker='o', alpha=1.0) if self.does_pairwise_sampling: plt.scatter(X_seq[-2], T_seq[-2], c='m', s=100, marker='o', alpha=1.0) plt.scatter(X_seq[-1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0) else: fig = plt.figure() fig.subplots_adjust(left=0.15) gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) upper = plt.subplot(gs[0]) lower = plt.subplot(gs[1]) upper.plot(self.meshgrid[0].astype(float), mu, color='g') lower.plot(self.meshgrid[0].astype(float), acq_score, color='y') if self.gt_available: upper.plot(self.meshgrid[0], self.z, alpha=0.3, color='b') upper.scatter(X, T, c='r', s=10, marker='o', alpha=1.0) if self.does_pairwise_sampling: upper.scatter(X_seq[-2], T_seq[-2], c='m', s=100, marker='o', alpha=1.0) upper.scatter(X_seq[-1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0) upper.set_ylabel('f(x)', fontdict={'size': 18}) lower.set_ylabel(self.acquisition_func_name.upper(), fontdict={'size': 18}) if self.ndim in [1, 2, 3]: exec("plot{}d()".format(self.ndim)) out_fn = os.path.join(output_dir, 'res_%04d.png' % self.point_info_manager.update_cnt) mkdir_if_not_exist(output_dir) plt.savefig(out_fn, transparent=True, bbox_inches='tight', pad_inches=0) plt.close() else: print("Sorry... Plotting only supports 1, 2, or 3 dim.")
import os import sys import numpy as np import pandas as pd from gphypo.util import mkdir_if_not_exist gp_param_dic = { "alpha": np.arange(-2, 2.01, 0.2), "beta": np.arange(-2, 2.01, 0.2), "n_cluster": np.arange(5, 20.1).astype(int) } gp_param2lda_param = { "alpha": lambda x: 10**x, "beta": lambda x: 10**x, "n_cluster": lambda x: x } output_dir = 'csv_files' mkdir_if_not_exist(output_dir) for k, v in gp_param_dic.items(): output_filename = os.path.join(output_dir, k) + ".csv" res = pd.DataFrame({k: v}) res['bo_' + k] = res[k].apply(gp_param2lda_param[k]) res.to_csv(output_filename, index=False) print(output_filename + " was created")
def singleTest(ACQUISITION_FUNC, trialCount): print("%s: trial %d"%(ACQUISITION_FUNC, trialCount)) OUTPUT_DIR = os.path.join(os.getcwd(), 'output_%s_clicks'%ACQUISITION_FUNC) if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) RESULT_FILENAME = os.path.join(OUTPUT_DIR, "gaussian_result_2dim_clicks_%s_trialCount_%d.csv"%(ACQUISITION_FUNC, trialCount)) mu_sigma_csv_path = './mu2ratio_%s/mu_sigma.csv'%ACQUISITION_FUNC ratio_csv_out_path = './mu2ratio_%s/ratios.csv'%ACQUISITION_FUNC N_TOTAL_EXP = 100000 print('GAMMA: ', GAMMA) print('GAMMA_Y: ', GAMMA_Y) print('GAMMA0:', GAMMA0) MU2RATIO_DIR = './mu2ratio_%s'%ACQUISITION_FUNC mkdir_if_not_exist(OUTPUT_DIR) mkdir_if_not_exist(MU2RATIO_DIR) mkdir_if_not_exist("./eval") param_names = sorted([x.replace('.csv', '') for x in os.listdir(parameter_dir)]) bo_param2model_param_dic = {} bo_param_list = [] for param_name in param_names: param_df = pd.read_csv(os.path.join(parameter_dir, param_name + '.csv'), dtype=str) bo_param_list.append(param_df[param_name].values) param_df.set_index(param_name, inplace=True) bo_param2model_param_dic[param_name] = param_df.to_dict()['bo_' + param_name] env = ClickTwoDimGaussianEnvironment(bo_param2model_param_dic=bo_param2model_param_dic, result_filename=RESULT_FILENAME, output_dir=OUTPUT_DIR, reload=reload) agent = GMRF_BO(bo_param_list, env, GAMMA=GAMMA, GAMMA0=GAMMA0, GAMMA_Y=GAMMA_Y, ALPHA=ALPHA, is_edge_normalized=IS_EDGE_NORMALIZED, gt_available=True, n_early_stopping=N_EARLY_STOPPING, burnin=BURNIN, normalize_output=NORMALIZE_OUTPUT, update_hyperparam_func=UPDATE_HYPERPARAM_FUNC, initial_k=INITIAL_K, initial_theta=INITIAL_THETA, acquisition_func=ACQUISITION_FUNC, acquisition_param_dic=ACQUISITION_PARAM_DIC, n_ctr=N_TOTAL_EXP) #agent.plot_click_distribution(output_dir) agent.save_mu_sigma_csv(outfn=mu_sigma_csv_path) # agent.learn_from_clicks() nIter = 100 for i in range(nIter): try: flg = agent.learn_from_clicks(mu2ratio_dir=MU2RATIO_DIR, mu_sigma_csv_path=mu_sigma_csv_path, ratio_csv_out_path=ratio_csv_out_path) # agent.sample_randomly() #agent.plot_click_distribution(output_dir) #break if flg == False: print("Early Stopping!!!") print(agent.bestX) print(agent.bestT) break except KeyboardInterrupt: print("Learnig process was forced to stop!") # print(agent.X) # print(agent.Treal) exit(0) os.system("mv %s/*.csv ./eval/"%OUTPUT_DIR)
def singleTest(ACQUISITION_FUNC, trialCount): print("%s: trial %d" % (ACQUISITION_FUNC, trialCount)) OUTPUT_DIR = os.path.join(os.getcwd(), 'output_%s' % ACQUISITION_FUNC) ######################## ### temporary ### if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) ################## RESULT_FILENAME = os.path.join( OUTPUT_DIR, "gaussian_result_4dim_%s_trialCount_%d.csv" % (ACQUISITION_FUNC, trialCount)) print('GAMMA: ', GAMMA) print('GAMMA_Y: ', GAMMA_Y) print('GAMMA0:', GAMMA0) mkdir_if_not_exist(OUTPUT_DIR) param_names = sorted( [x.replace('.csv', '') for x in os.listdir(PARAMETER_DIR)]) bo_param2model_param_dic = {} bo_param_list = [] for param_name in param_names: param_df = pd.read_csv(os.path.join(PARAMETER_DIR, param_name + '.csv'), dtype=str) bo_param_list.append(param_df[param_name].values) param_df.set_index(param_name, inplace=True) bo_param2model_param_dic[param_name] = param_df.to_dict()['gp_' + param_name] env = FourDimGaussianEnvironment( bo_param2model_param_dic=bo_param2model_param_dic, result_filename=RESULT_FILENAME, output_dir=OUTPUT_DIR, reload=False) agent = GMRF_BO(bo_param_list, env, GAMMA=GAMMA, GAMMA0=GAMMA0, GAMMA_Y=GAMMA_Y, ALPHA=ALPHA, is_edge_normalized=IS_EDGE_NORMALIZED, gt_available=True, n_early_stopping=N_EARLY_STOPPING, burnin=BURNIN, normalize_output=NORMALIZE_OUTPUT, update_hyperparam_func=UPDATE_HYPERPARAM_FUNC, initial_k=INITIAL_K, initial_theta=INITIAL_THETA, acquisition_func=ACQUISITION_FUNC, acquisition_param_dic=ACQUISITION_PARAM_DIC) nIter = 500 for i in range(nIter): flg = agent.learn(drop=True if i < nIter - 1 else False) if not flg: print("Early Stopping!!!") print(agent.bestX) print(agent.bestT) break os.system("mv %s/*.csv ./eval/" % OUTPUT_DIR)
os.system("mv %s/*.csv ./eval/" % OUTPUT_DIR) def testForTrials(acFunc, nIter): trialCount = 21 while trialCount < nIter: #np.random.seed(int(time.time())) singleTest(acFunc, trialCount) trialCount += 1 if __name__ == '__main__': # for ac in :#["ucb", "pi", "ei", "greedy", "ts"]: # iterCount = 0 # while iterCount < 50: # test(ac, iterCount) # iterCount += 1 mkdir_if_not_exist(os.path.join(os.getcwd(), "eval")) acFuncs = sys.argv[1] nTrials = 30 # jobs = [] # for acFuncs, nTrial in zip(acFuncs, nTrials): # #testForTrials(acFuncs, nTrial) # p = Process(target=testForTrials, args=(acFuncs, nTrial,)) # jobs.append(p) # p.start() # # for p in jobs: # p.join() testForTrials(acFuncs, nTrials)
def singleTest(ACQUISITION_FUNC, trialCount): print("%s: trial %d" % (ACQUISITION_FUNC, trialCount)) OUTPUT_DIR = os.path.join(os.getcwd(), 'output_%s' % ACQUISITION_FUNC) # ### temporary ### if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) ################## RESULT_FILENAME = os.path.join( OUTPUT_DIR, "gaussian_result_1dim_%s_trialCount_%d.csv" % (ACQUISITION_FUNC, trialCount)) np.random.seed(int(time.time())) print('GAMMA: ', GAMMA) print('GAMMA_Y: ', GAMMA_Y) print('GAMMA0:', GAMMA0) mkdir_if_not_exist(OUTPUT_DIR) param_names = sorted( [x.replace('.csv', '') for x in os.listdir(PARAMETER_DIR)]) bo_param2model_param_dic = {} bo_param_list = [] for param_name in param_names: # param_name is a param file's name param_df = pd.read_csv( os.path.join(PARAMETER_DIR, param_name + '.csv'), dtype=str) #makes index column type str instead of float # always read the column of the same name as the file name -- param_name bo_param_list.append(param_df[param_name].values) # param_df has a column of its csv file name, e.g. "x" # and this column is set as the index column param_df.set_index(param_name, inplace=True) # dict: param_file name -> column dict (the column with the name "bo_"+param_file name) # column dict: index column element -> cell value #index column is type str bo_param2model_param_dic[param_name] = param_df.to_dict()['bo_' + param_name] # bo_param_list is a list of every "bo_" column in all the param files of param_names # print("bo_param_list", bo_param_list) # env = SinEnvironment(bo_param2model_param_dic=bo_param2model_param_dic, # result_filename=RESULT_FILENAME, # output_dir=OUTPUT_DIR, # reload=RELOAD) env = OneDimGaussianEnvironment( bo_param2model_param_dic=bo_param2model_param_dic, result_filename=RESULT_FILENAME, output_dir=OUTPUT_DIR, reload=RELOAD) agent = GMRF_BO(bo_param_list, env, GAMMA=GAMMA, GAMMA0=GAMMA0, GAMMA_Y=GAMMA_Y, ALPHA=ALPHA, is_edge_normalized=IS_EDGE_NORMALIZED, gt_available=True, n_early_stopping=N_EARLY_STOPPING, burnin=BURNIN, normalize_output=NORMALIZE_OUTPUT, update_hyperparam_func=UPDATE_HYPERPARAM_FUNC, initial_k=INITIAL_K, initial_theta=INITIAL_THETA, acquisition_func=ACQUISITION_FUNC, acquisition_param_dic=ACQUISITION_PARAM_DIC) # agent = GP_BO(bo_param_list, env, # gt_available=True, # my_kernel=kernel, # burnin=BURNIN, # normalize_output=NORMALIZE_OUTPUT, # acquisition_func=ACQUISITION_FUNC, # acquisition_param_dic=ACQUISITION_PARAM_DIC) nIter = 1000 for i in range(nIter): flg = agent.learn() #agent.plot(output_dir=OUTPUT_DIR) #plotting causes deadlock among processes #agent.save_mu_sigma_csv() #this line causes deadlock among processes (I/O contention) if flg == False: print("Early Stopping!!!") print("bestX =", agent.bestX) print("bestT =", agent.bestT) break #plot_1dim(agent.point_info_manager.T_seq, 'reward.png') #subprocess.call(["./convert_pngs2gif.sh ./output/res*.png demo_%s_iterCount_%d.gif"%(ACQUISITION_FUNC, iterCount)]) #os.system("mv %s/*.gif ./eval/"%OUTPUT_DIR) os.system("mv %s/*.csv ./eval/" % OUTPUT_DIR)
def test(ACQUISITION_FUNC): # ### temporary ### if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) ################## print('GAMMA: ', GAMMA) print('GAMMA_Y: ', GAMMA_Y) print('GAMMA0:', GAMMA0) mkdir_if_not_exist(OUTPUT_DIR) param_names = sorted( [x.replace('.csv', '') for x in os.listdir(PARAMETER_DIR)]) bo_param2model_param_dic = {} bo_param_list = [] for param_name in param_names: # param_name is a param file's name param_df = pd.read_csv( os.path.join(PARAMETER_DIR, param_name + '.csv'), dtype=str) #makes index column type str instead of float # always read the column of the same name as the file name -- param_name bo_param_list.append(param_df[param_name].values) # param_df has a column of its csv file name, e.g. "x" # and this column is set as the index column param_df.set_index(param_name, inplace=True) # dict: param_file name -> column dict (the column with the name "bo_"+param_file name) # column dict: index column element -> cell value #index column is type str bo_param2model_param_dic[param_name] = param_df.to_dict()['bo_' + param_name] # bo_param_list is a list of every "bo_" column in all the param files of param_names # print("bo_param_list", bo_param_list) # env = SinEnvironment(bo_param2model_param_dic=bo_param2model_param_dic, # result_filename=RESULT_FILENAME, # output_dir=OUTPUT_DIR, # reload=RELOAD) env = OneDimGaussianEnvironment( bo_param2model_param_dic=bo_param2model_param_dic, result_filename=RESULT_FILENAME, output_dir=OUTPUT_DIR, reload=RELOAD) # agent = GMRF_BO(bo_param_list, env, GAMMA=GAMMA, GAMMA0=GAMMA0, GAMMA_Y=GAMMA_Y, ALPHA=ALPHA, # is_edge_normalized=IS_EDGE_NORMALIZED, # gt_available=True, # n_early_stopping=N_EARLY_STOPPING, # burnin=BURNIN, # normalize_output=NORMALIZE_OUTPUT, # update_hyperparam_func=UPDATE_HYPERPARAM_FUNC, # initial_k=INITIAL_K, # initial_theta=INITIAL_THETA, # acquisition_func=ACQUISITION_FUNC, # acquisition_param_dic=ACQUISITION_PARAM_DIC) agent = GP_BO(bo_param_list, env, gt_available=True, my_kernel=kernel, burnin=BURNIN, normalize_output=NORMALIZE_OUTPUT, acquisition_func=ACQUISITION_FUNC, acquisition_param_dic=ACQUISITION_PARAM_DIC) nIter = 200 for i in range(nIter): flg = agent.learn(drop=True if i < nIter - 1 else False) agent.plot(output_dir=OUTPUT_DIR) agent.save_mu_sigma_csv() if flg == False: print("Early Stopping!!!") print("bestX =", agent.bestX) print("bestT =", agent.bestT) break plot_1dim(agent.point_info_manager.T_seq, 'reward.png') subprocess.call([ "./convert_pngs2gif.sh demo_%s_iter_%d_eps_%f.gif" % (ACQUISITION_FUNC, nIter, ACQUISITION_PARAM_DIC["eps"]) ], shell=True) os.system("mv ./output/*.gif ./")