def evaluateDataset(csv_path, target_index, problem, model, parameter_dict, method='holdout', seed=20, max_iter=50): print('Now evaluating {}...'.format(csv_path)) X, y = build(csv_path, target_index) wrapper = loss(model, X, y, method=method, problem=problem) print('Evaluating EI') np.random.seed(seed) sexp = squaredExponential() gp = GaussianProcess(sexp, optimize=True, usegrads=True) acq_ei = Acquisition(mode='ExpectedImprovement') gpgo_ei = GPGO(gp, acq_ei, wrapper.evaluateLoss, parameter_dict, n_jobs=1) gpgo_ei.run(max_iter=max_iter) # Also add UCB, beta = 0.5, beta = 1.5 print('Evaluating UCB beta = 0.5') np.random.seed(seed) sexp = squaredExponential() gp = GaussianProcess(sexp, optimize=True, usegrads=True) acq_ucb = Acquisition(mode='UCB', beta=0.5) gpgo_ucb = GPGO(gp, acq_ucb, wrapper.evaluateLoss, parameter_dict, n_jobs=1) gpgo_ucb.run(max_iter=max_iter) print('Evaluating UCB beta = 1.5') np.random.seed(seed) sexp = squaredExponential() gp = GaussianProcess(sexp, optimize=True, usegrads=True) acq_ucb2 = Acquisition(mode='UCB', beta=1.5) gpgo_ucb2 = GPGO(gp, acq_ucb2, wrapper.evaluateLoss, parameter_dict, n_jobs=1) gpgo_ucb2.run(max_iter=max_iter) print('Evaluating random') np.random.seed(seed) r = evaluateRandom(gpgo_ei, wrapper.evaluateLoss, n_eval=max_iter + 1) r = cumMax(r) return np.array(gpgo_ei.history), np.array(gpgo_ucb.history), np.array( gpgo_ucb2.history), r
def f_bo(single_iter_bo=100): sexp = matern52() gp = GaussianProcess(sexp) ''' 'ExpectedImprovement': self.ExpectedImprovement, 'IntegratedExpectedImprovement': self.IntegratedExpectedImprovement, 'ProbabilityImprovement': self.ProbabilityImprovement, 'IntegratedProbabilityImprovement': self.IntegratedProbabilityImprovement, 'UCB': self.UCB, 'IntegratedUCB': self.IntegratedUCB, 'Entropy': self.Entropy, 'tExpectedImprovement': self.tExpectedImprovement, 'tIntegratedExpectedImprovement': self.tIntegratedExpectedImprovement ''' acq = Acquisition(mode='ExpectedImprovement') param = OrderedDict() for temp in X_name: param[temp] = ('cont', x_round[0]) gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=single_iter_bo, nstart=100) res, f_min_xy = gpgo.getResult() f_list = [] f_list.extend(gpgo.return_max_f()) print('f_list:', f_list) return f_list
def test_GPGO_sk(): np.random.seed(20) rf = RandomForest() acq = Acquisition(mode='ExpectedImprovement') params = {'x': ('cont', (0, 1))} gpgo = GPGO(rf, acq, f, params) gpgo.run(max_iter=10) res = gpgo.getResult()[0] assert .7 < res['x'] < .8
def test_GPGO_mcmc(): np.random.seed(20) sexp = squaredExponential() gp = GaussianProcessMCMC(sexp, step=pm.Slice, niter=100) acq = Acquisition(mode='IntegratedExpectedImprovement') params = {'x': ('cont', (0, 1))} gpgo = GPGO(gp, acq, f, params) gpgo.run(max_iter=10) res = gpgo.getResult()[0] assert .7 < res['x'] < .8
def test_GPGO(): np.random.seed(20) sexp = squaredExponential() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') params = {'x': ('cont', (0, 1))} gpgo = GPGO(gp, acq, f, params) gpgo.run(max_iter=10) res = gpgo.getResult()[0] assert .6 < res['x'] < .8
def part_2(max_iter): acq_1 = Acquisition(mode='ExpectedImprovement') acq_2 = Acquisition(mode='ProbabilityImprovement') acq_3 = Acquisition(mode='UCB', beta=0.5) acq_4 = Acquisition(mode='UCB', beta=1.5) acq_list = [acq_1, acq_2, acq_3, acq_4] sqexp = squaredExponential() param = OrderedDict() param['x'] = ('cont', [-2, 2]) param['y'] = ('cont', [-2, 2]) new = True colors = ['green', 'red', 'orange', 'black'] acq_titles = [ 'Expected improvement', 'Probability of Improvement', 'GP-UCB, beta = .5', 'GP-UCB beta = 1.5' ] plt.suptitle('Acquisition Functions with Convergence Rates') idx = 0 for index, acq in enumerate(acq_list): np.random.seed(200) gp = GaussianProcess(sqexp) gpgo = GPGO(gp, acq, Part_1a.f, param) gpgo.run(max_iter=max_iter) plt.subplot(4, 2, idx + 1) plot_acquisition(gpgo, param, index + 2, colors, acq_titles[index], new=new) plt.subplot(4, 2, idx + 2) plot_convergence(gpgo, acq_titles[index]) new = False idx = idx + 2 plt.show()
def part_1(max_iter): # Plot the function param = OrderedDict() param['x'] = ('cont', [-2, 2]) param['y'] = ('cont', [-2, 2]) # squared exponential kernel function plt.suptitle("Convergence Rate, True Optimum = 0") np.random.seed(20) plt.subplot(131) sqexp = squaredExponential() gp = GaussianProcess(sqexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, Part_1a.f, param, n_jobs=-1) gpgo.run(max_iter=max_iter) plot_convergence(gpgo, "Squared Exponential Kernel") # matern52 kernel function np.random.seed(20) plt.subplot(132) matern = matern52() gp = GaussianProcess(matern) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, Part_1a.f, param, n_jobs=-1) gpgo.run(max_iter=max_iter) plot_convergence(gpgo, "Matern52 Kernel") # rational quadratic kernel function np.random.seed(20) plt.subplot(133) ratq = rationalQuadratic() gp = GaussianProcess(ratq) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, Part_1a.f, param, n_jobs=-1) gpgo.run(max_iter=max_iter) plot_convergence(gpgo, "Rational Quadratic Kernel") plt.show()
def main(): def f(x): return (np.sin(x)) sexp = squaredExponential() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') param = {'n_hidden_2': ('int', [80,120]),'aOp'} np.random.seed(23) gpgo = GPGO(gp, acq, Main_Loop, param) gpgo.run(max_iter=20) res = gpgo.getResult()[0] print res
def execute_pygpgo(params, problem, max_eval, log): from solvers.pyGPGO.wpyGPGO import optimize_pyGPGO from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess rand_evals = int(params['--rand-evals']) # TODO: Allow picking different values for these? cov = matern32() gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ExpectedImprovement') return optimize_pyGPGO(problem, max_eval, gp, acq, random_init_evals=rand_evals, log=log)
def func_first_order(type_f=None, xishu_f=None, point_f=None): # 独立变量的线性、非线性判断 if type_f == 'linear': print('执行一阶线性优化') # 左端点函数值 f_left = rbf_hdmr.func_1D_value(x_round[0][0], type=type_f, xishu=xishu_f, point_sample=point_f) # 右端点函数值 f_right = rbf_hdmr.func_1D_value(x_round[0][1], type=type_f, xishu=xishu_f, point_sample=point_f) if f_left > f_right: f_min_i = f_right x_min = x_round[0][1] else: f_min_i = f_left x_min = x_round[0][0] # 独立变量的非线性情况 else: print('执行一阶非线性函数优化') # 非一维线性函数最好的办法采用BO来找函数最小值 def f(x): return -(rbf_hdmr.func_1D_value( x, type=type_f, xishu=xishu_f, point_sample=point_f)) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') round_x = (x_round[0][0], x_round[0][1]) param = {'x': ('cont', round_x)} gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=20, nstart=10) res, f_min_i = gpgo.getResult() print('res:', res) x_min = res[0] return x_min, f_min_i
def main2(): sexp = squaredExponential() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') param = { 'r1': ('cont', (0, 1)), 'r2': ('cont', (0, 1)), 'r3': ('cont', (0, 1)), 'r4': ('cont', (0, 1)), 'r5': ('cont', (0, 1)), 'r6': ('cont', (0, 1)), 'r7': ('cont', (0, 1)), 'r8': ('cont', (0, 1)) } gpgo = GPGO(gp, acq, Main_Loop, param) gpgo.run(max_iter=200) res = gpgo.getResult()[0] print res
def func_model(index_ij=None, x_min=None, func_min=None, max_iter_i=10, nstart_i=10): ''' :param index_ij: 选择的函数项 :param init_x: 初始的函数最优解 :return: 本地迭代函数最优解 ''' first_order = [] second_order = [] for k in index_ij: if k < len(type_fx): # 一阶 first_order.append(k) else: second_order.append(x_ij_index[k - len(type_fx)]) # 二阶中的变量 x_inter = [] if len(second_order) != 0: for index in range(len(second_order)): x_inter.append(list(second_order[index])) x_inter = np.unique(x_inter) print('first_order:', first_order) print('second_order:', second_order) print('x_inter:', x_inter) # 定义优化维度 temp_first = first_order.copy() temp_first.extend(x_inter) index_dimen = np.unique(temp_first) # 定义一个数组,用来存放需要和二阶函数一起计算的一阶函数自变量代号 # 如果所有的一阶函数和二阶函数无关的话,该数组为空,且下面代码自动求取最小值,并根据坐标添加到对应的自变量取值和函数最小值中 denpend_point_1D = [] if len(first_order) != 0: for i in first_order: # print(i) # print('x_inter:', x_inter) # 独立情况 if i not in x_inter: type_fx_i = type_fx[i] xishu_arr_i = xishu_arr[i] point_round_i = point_round[i] print('执行一阶不相关变量的优化, 函数下标为:', i) min_x, min_f = func_first_order(type_f=type_fx_i, xishu_f=xishu_arr_i, point_f=point_round_i) # print('min_x:', min_x) x_min[i] = min_x # print('一阶线性无关', x_min) func_min += min_f else: denpend_point_1D.append(i) ''' 这里只存在相关的变量 # 函数代号剩余存放在denpend_point(肯定和二维函数共项变量的一维函数) 和 second_order 接下来要判断一阶函数和哪些二阶函数具有共同的自变量 1.判断二阶函数中有哪些共项 ''' # print('denpend_point_1D:', denpend_point_1D) # print('second_order:', second_order) # 解决二维情况,分解为二维相关和二维无关 X = [ 'A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ] # 判断二阶变量是否具有相关性 depend_2D, independ_2D = is_xiangguan_2D(second_order=second_order) print('depend_2D:', depend_2D) print('independ_2D:', independ_2D) # 要判断一维和二维的关系 # 存在一阶与二阶相关的函数,即二阶函数不为空 if len(denpend_point_1D) != 0: # 情况分为两种:一维函数与二维相关函数有关 # 一维函数与二维不相关函数有关 # 1. 构建一个二维数组,行表示一阶非独立函数,列表示二阶非独立函数或者二阶独立函数 # denpend_point_1D + depend_2D if len(depend_2D) != 0: # 二维相关函数变量 unique_2D = [] for i in range(len(depend_2D)): temp_0i = [] for j in range(len(depend_2D[i])): aaa = list(second_order[depend_2D[i][j]]) temp_0i.append(list(aaa)) temp_0i = np.unique(temp_0i) unique_2D.append(temp_0i) # print('unique_2d:----------------', unique_2D) # 求解depend_2D数组中所对应的second——order中数组去掉相同项 # 查看每一列,找到数值为1的项,说明该一维函数和对应的二维函数相关。 flag_arr = np.array([[-1] * len(unique_2D)] * len(denpend_point_1D)) for row in range(len(denpend_point_1D)): for col in range(len(unique_2D)): if denpend_point_1D[row] in list(unique_2D[col]): flag_arr[row, col] = 1 # print('flag_arr:', flag_arr) # 定义二维数组,存放1D函数 和2D函数的的关系,里面的每个一维数组的第一个元素代表二维函数在depend_2D中的下标,后面的每个元素都代表一个一维函数 f1_f2 = [] # 先访问列 for col in range(len(depend_2D)): # 再访问行 f2D = [col] for row in range(len(denpend_point_1D)): if flag_arr[row, col] == 1: f2D.append(row) f1_f2.append(f2D) # print('f1_f2:', f1_f2) for row in range(len(f1_f2)): # 表示一阶函数与二阶函数存在相关变量 if len(f1_f2[row]) > 1: print('执行一阶函数与二阶相关函数的优化') # # 一维函数好多,里边每个元素的标号代表函数编号,而且也是自变量编号 f_1_depend = f1_f2[row][1:] # print('f_1_depend:', f_1_depend) # 为了找到一维函数和二维函数一共使用了多少变量, #将一维和二维的自变量并起来,并去重 # 构建二维函数的系数矩阵 ij_index_i = [] ij_xishu_i = [] ij_point_i = [] for i in range(len(depend_2D[row])): ij = second_order[depend_2D[row][i]] # print('ij:', ij) # 为了找函数系数 index = -1 for j in range(len(x_ij_index)): if x_ij_index[j][0] == ij[0] and x_ij_index[j][ 1] == ij[1]: index = j # print('index:', index) ij_index_i.append(x_ij_index[index]) ij_xishu_i.append(x_ij_xishu[index]) ij_point_i.append(x_ij_point[index]) # print('ij_index_i:', ij_index_i) len_ij = np.unique(np.array(ij_index_i)) # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量 X_name = [] for x in range(len(len_ij)): X_name.append(X[len_ij[x]]) def f(X_name): f_index = 0 # 一阶函数 for i in range(len(f_1_depend)): type_fx_1 = type_fx[f_1_depend[i]] xishu_arr_1 = xishu_arr[f_1_depend[i]] point_round_1 = point_round[f_1_depend[i]] point_index = -1 for x in range(len(len_ij)): if f_1_depend[i] == len_ij[x]: point_index = x x_name = X_name[point_index] f_index += -(rbf_hdmr.func_1D_value( x_name, type=type_fx_1, xishu=xishu_arr_1, point_sample=point_round_1)) for index in range(len(depend_2D[row])): ij_index = ij_index_i[index] ij_xishu = ij_xishu_i[index] ij_point = ij_point_i[index] # print('ij_index:', ij_index) # print('X_name:', X_name) left = -1 right = -1 for x in range(len(len_ij)): if ij_index[0] == len_ij[x]: left = x if ij_index[1] == len_ij[x]: right = x x_name = [X_name[left], X_name[right]] # print('x_name:', x_name) f_index += -(rbf_hdmr.func_2D_value( x_name, index_ij=ij_index, xishu=ij_xishu, points=ij_point)) return f_index param = OrderedDict() for m in range(len(len_ij)): # print('x_round[ij_index_i[i]]', x_round[len_ij[m]]) param[X_name[m]] = ('cont', x_round[len_ij[m]]) # print(param) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=max_iter_i, nstart=nstart_i) res, max_xy = gpgo.getResult() # print('ij:', ij) for x in range(len(len_ij)): x_min[len_ij[x]] = res[x] # print('x_min:', x_min) func_min += max_xy # 说明该一阶函数二阶相关函数独立 else: print('一阶函数不与二阶不相关函数相关') # f_2_x表示在depend_2D中对应的相关项 f_2_x = depend_2D[f1_f2[row][0]] # print('f_2_x:', f_2_x) # 构建二维函数的系数矩阵 ij_index_i = [] ij_xishu_i = [] ij_point_i = [] for i in range(len(depend_2D[row])): ij = second_order[depend_2D[row][i]] # print('ij:', ij) # 为了找函数系数 index = -1 for j in range(len(x_ij_index)): if x_ij_index[j][0] == ij[0] and x_ij_index[j][ 1] == ij[1]: index = j # print('index:', index) ij_index_i.append(x_ij_index[index]) ij_xishu_i.append(x_ij_xishu[index]) ij_point_i.append(x_ij_point[index]) # print('ij_index_i:', ij_index_i) len_ij = np.unique(np.array(ij_index_i)) # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量 X_name = [] for x in range(len(len_ij)): X_name.append(X[len_ij[x]]) def f(X_name): f_index = 0 for index in range(len(depend_2D[row])): ij_index = ij_index_i[index] ij_xishu = ij_xishu_i[index] ij_point = ij_point_i[index] # print('ij_index:', ij_index) # print('X_name:', X_name) left = -1 right = -1 for x in range(len(len_ij)): if ij_index[0] == len_ij[x]: left = x if ij_index[1] == len_ij[x]: right = x # 二阶函数 x_name = [X_name[left], X_name[right]] # print('x_name:', x_name) f_index += -(rbf_hdmr.func_2D_value( x_name, index_ij=ij_index, xishu=ij_xishu, points=ij_point)) return f_index param = OrderedDict() for m in range(len(len_ij)): # print('x_round[ij_index_i[i]]', x_round[len_ij[m]]) param[X_name[m]] = ('cont', x_round[len_ij[m]]) # print(param) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=max_iter_i, nstart=nstart_i) res, max_xy = gpgo.getResult() # print('ij:', ij) for x in range(len(len_ij)): x_min[len_ij[x]] = res[x] # print('x_min:', x_min) func_min += max_xy if len(independ_2D) != 0: # 二维不相关函数变量 unique_2D = [] for i in range(len(independ_2D)): temp_0i = [] for j in range(len(independ_2D[i])): aaa = list(second_order[independ_2D[i][j]]) temp_0i.append(list(aaa)) temp_0i = np.unique(temp_0i) unique_2D.append(temp_0i) # print('unique_2d:----------------', unique_2D) # 求解independ_2D数组中所对应的second——order中数组去掉相同项 # 查看每一列,找到数值为1的项,说明该一维函数和对应的二维函数相关。 flag_arr = np.array([[-1] * len(unique_2D)] * len(denpend_point_1D)) # print(flag_arr) for row in range(len(denpend_point_1D)): for col in range(len(unique_2D)): if denpend_point_1D[row] in list(unique_2D[col]): flag_arr[row, col] = 1 # print(flag_arr) # 定义二维数组,存放1D函数 和2D函数的的关系,里面的每个一维数组的第一个元素代表二维函数在depend_2D中的下标,后面的每个元素都代表一个一维函数 f1_f2 = [] # 先访问列 for col in range(len(independ_2D)): # 再访问行 f2D = [col] for row in range(len(denpend_point_1D)): if flag_arr[row, col] == 1: f2D.append(row) f1_f2.append(f2D) # print('f1_f2:', f1_f2) for row in range(len(f1_f2)): # 表示一阶函数与二阶函数存在相关变量 if len(f1_f2[row]) > 1: print('执行一阶函数与二阶非相关函数的优化') # f_2_x表示在independ_2D中对应的相关项 f_2_x = independ_2D[f1_f2[row][0]] # print('f_2_x:', f_2_x) # 一维函数好多,里边每个元素的标号代表函数编号,而且也是自变量编号 f_1_depend = f1_f2[row][1:] # print('f_1_depend:', f_1_depend) # 为了找到一维函数和二维函数一共使用了多少变量, #将一维和二维的自变量并起来,并去重 # 一二维函数自变量的自变量(只要确定二维函数使用了哪些变量就可以) f_1 = unique_2D[row] # print('f_1:', f_1) # 构建二维函数的系数矩阵 ij_index_i = [] ij_xishu_i = [] ij_point_i = [] for i in range(len(independ_2D[row])): ij = second_order[independ_2D[row][i]] # print('ij:', ij) # 为了找函数系数 index = -1 for j in range(len(x_ij_index)): if x_ij_index[j][0] == ij[0] and x_ij_index[j][ 1] == ij[1]: index = j # print('index:', index) ij_index_i.append(x_ij_index[index]) ij_xishu_i.append(x_ij_xishu[index]) ij_point_i.append(x_ij_point[index]) # print('ij_index_i:', ij_index_i) len_ij = np.unique(np.array(ij_index_i)) # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量 X_name = [] for x in range(len(len_ij)): X_name.append(X[len_ij[x]]) def f(X_name): f_index = 0 # 一阶函数 for i in range(len(f_1_depend)): # print('f_1_depend:', f_1_depend) type_fx_1 = type_fx[f_1_depend[i]] xishu_arr_1 = xishu_arr[f_1_depend[i]] point_round_1 = point_round[f_1_depend[i]] point_index = -1 for x in range(len(len_ij)): if f_1_depend[i] == len_ij[x]: point_index = x x_name = X_name[point_index] f_index += -(rbf_hdmr.func_1D_value( x_name, type=type_fx_1, xishu=xishu_arr_1, point_sample=point_round_1)) for index in range(len(independ_2D[row])): ij_index = ij_index_i[index] ij_xishu = ij_xishu_i[index] ij_point = ij_point_i[index] # print('ij_index:', ij_index) # print('X_name:', X_name) left = -1 right = -1 for x in range(len(len_ij)): if ij_index[0] == len_ij[x]: left = x if ij_index[1] == len_ij[x]: right = x x_name = [X_name[left], X_name[right]] # print('x_name:', x_name) f_index += -(rbf_hdmr.func_2D_value( x_name, index_ij=ij_index, xishu=ij_xishu, points=ij_point)) return f_index param = OrderedDict() for m in range(len(len_ij)): # print('x_round[ij_index_i[i]]', x_round[len_ij[m]]) param[X_name[m]] = ('cont', x_round[len_ij[m]]) # print(param) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=max_iter_i, nstart=nstart_i) res, max_xy = gpgo.getResult() # print('ij:', ij) for x in range(len(len_ij)): x_min[len_ij[x]] = res[x] func_min += max_xy # print('x_min:', x_min) # 说明该一阶函数二阶相关函数独立 else: print('执行二阶非相关函数的优化') # print('duli') # f_2_x表示在depend_2D中对应的相关项 f_2_x = independ_2D[f1_f2[row][0]] # print('f_2_x:', f_2_x) # 构建二维函数的系数矩阵 ij_index_i = [] ij_xishu_i = [] ij_point_i = [] for i in range(len(independ_2D[row])): ij = second_order[independ_2D[row][i]] # print('ij:', ij) # 为了找函数系数 index = -1 for j in range(len(x_ij_index)): if x_ij_index[j][0] == ij[0] and x_ij_index[j][ 1] == ij[1]: index = j # print('index:', index) ij_index_i.append(x_ij_index[index]) ij_xishu_i.append(x_ij_xishu[index]) ij_point_i.append(x_ij_point[index]) # print('ij_index_i:', ij_index_i) len_ij = np.unique(np.array(ij_index_i)) # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量 X_name = [] for x in range(len(len_ij)): X_name.append(X[len_ij[x]]) def f(X_name): f_index = 0 # 二阶函数 for index in range(len(independ_2D[row])): ij_index = ij_index_i[index] ij_xishu = ij_xishu_i[index] ij_point = ij_point_i[index] # print('ij_index:', ij_index) # print('X_name:', X_name) left = -1 right = -1 for x in range(len(len_ij)): if ij_index[0] == len_ij[x]: left = x if ij_index[1] == len_ij[x]: right = x x_name = [X_name[left], X_name[right]] # print('x_name:', x_name) f_index += -(rbf_hdmr.func_2D_value( x_name, index_ij=ij_index, xishu=ij_xishu, points=ij_point)) return f_index param = OrderedDict() for m in range(len(len_ij)): # print('x_round[ij_index_i[i]]', x_round[len_ij[m]]) param[X_name[m]] = ('cont', x_round[len_ij[m]]) # print(param) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=max_iter_i, nstart=nstart_i) res, max_xy = gpgo.getResult() # print('ij:', ij) for x in range(len(len_ij)): x_min[len_ij[x]] = res[x] # print('x_min:', x_min) func_min += max_xy # 只存在二维相关性问题 elif len(denpend_point_1D) == 0: # 解决一维与二维不存在相关变量且二维非先关变量的函数 if len(independ_2D) != 0: print('执行二阶不相关变量的优化') for i in range(len(independ_2D)): ij = second_order[independ_2D[i][0]] # 在相关数组中的坐标 index = -1 for j in range(len(x_ij_index)): if x_ij_index[j][0] == ij[0] and x_ij_index[j][1] == ij[1]: index = j # print(index) ij_index = x_ij_index[index] ij_xishu = x_ij_xishu[index] ij_point = x_ij_point[index] # print('ij_index:', ij_index) X_name = [X[ij_index[0]], X[ij_index[1]]] def f(X_name): return -(rbf_hdmr.func_2D_value(X_name, index_ij=ij_index, xishu=ij_xishu, points=ij_point)) param = OrderedDict() for m in range(len(ij_index)): # print('132', x_round[ij_index[i]]) param[X_name[m]] = ('cont', x_round[ij_index[m]]) # print(param) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=max_iter_i, nstart=nstart_i) res, max_xy = gpgo.getResult() # print('ij:', ij) # print(res) for hiahia in range(len(ij_index)): x_min[ij_index[hiahia]] = res[hiahia] func_min += max_xy # print('x_min:', x_min) # print('f_min:', func_min) # 解决二维相关变量问题 if len(depend_2D) != 0: print('执行二阶相关变量的优化') for i in range(len(depend_2D)): temp = depend_2D[i] # print('temp:', temp) ij_index_i = [] ij_xishu_i = [] ij_point_i = [] for k in range(len(temp)): ij = second_order[temp[k]] # 为了找函数系数 index = -1 for j in range(len(x_ij_index)): if x_ij_index[j][0] == ij[0] and x_ij_index[j][ 1] == ij[1]: index = j ij_index_i.append(x_ij_index[index]) ij_xishu_i.append(x_ij_xishu[index]) ij_point_i.append(x_ij_point[index]) len_ij = np.unique(np.array(ij_index_i)) # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量 X_name = [] for x in range(len(len_ij)): X_name.append(X[len_ij[x]]) def f(X_name): f_index = 0 for index in range(len(ij_index_i)): ij_index = ij_index_i[index] ij_xishu = ij_xishu_i[index] ij_point = ij_point_i[index] # print('ij_index:', ij_index) # print('X_name:', X_name) left = -1 right = -1 for x in range(len(len_ij)): if ij_index[0] == len_ij[x]: left = x if ij_index[1] == len_ij[x]: right = x # 二阶函数 x_name = [X_name[left], X_name[right]] # print('x_name:', x_name) f_index += -(rbf_hdmr.func_2D_value(x_name, index_ij=ij_index, xishu=ij_xishu, points=ij_point)) return f_index param = OrderedDict() for m in range(len(len_ij)): # print('x_round[ij_index_i[i]]', x_round[len_ij[m]]) param[X_name[m]] = ('cont', x_round[len_ij[m]]) # print(param) sexp = matern52() gp = GaussianProcess(sexp) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=max_iter_i, nstart=nstart_i) res, max_xy = gpgo.getResult() # print('ij:', ij) for x in range(len(len_ij)): x_min[len_ij[x]] = res[x] # print('x_min:', x_min) func_min += max_xy # 现在只剩下一维和二维相关的两种函数,但是并不知道谁和谁相关 ''' 但我们知道的是贝叶斯优化在低维阶段有较高的优化能力(D<=5),所以我的想法是 1.采用精确函数,一阶、二阶精确函数进行计算(待实现) 2.采用近似函数,一阶、二阶近似函数进行计算(本代码采用) 分类三类函数: 只存在一阶函数(已解决) 只存在二阶函数(且不存在共项变量的情况已解决) 即存在一阶函数,还存在二阶函数 ''' return x_min, func_min, index_dimen
# Start to measure total time needed for the elaboration start_time = time() # Setting and running GPGO function gpgo = GPGO(model, acquisition, objective_function, param, n_jobs=1) gpgo.run(max_iter=budget - n_initial_evaluation, init_evals=n_initial_evaluation) # Printing the total time required for the elaboration print("Total execution time:", int((time() - start_time)), "seconds") # Plotting history of the best value seen plt.plot(gpgo.history) plt.title('Accuracy vs Iterations') plt.ylabel('Accuracy (%)') plt.xlabel('# Iterations') plt.show() print("Best set of hyper-parameters found:") print(gpgo.getResult()) return gpgo.history model = RandomForest(n_estimators=n_estimators) acquisition = Acquisition(mode='ProbabilityImprovement') print("Surrogate Model: Random Forest") print("Acquisition Function: Probability of Improvement") test_history = SMBO(model, acquisition)
def hyperparam_search( # type: ignore[override] self, params_dict: PARAM_DICT, train_dataset: Dataset, valid_dataset: Dataset, metric: Metric, use_max: bool = True, logdir: Optional[str] = None, max_iter: int = 20, search_range: Union[int, float, PARAM_DICT] = 4, logfile: Optional[str] = None): """Perform hyperparameter search using a gaussian process. Parameters ---------- params_dict : Dict Maps hyperparameter names (strings) to possible parameter values. The semantics of this list are different than for `GridHyperparamOpt`. `params_dict[hp]` must map to an int/float, which is used as the center of a search with radius `search_range` since pyGPGO can only optimize numerical hyperparameters. train_dataset : Dataset dataset used for training valid_dataset : Dataset dataset used for validation(optimization on valid scores) metric : Metric metric used for evaluation use_max : bool, (default True) Specifies whether to maximize or minimize `metric`. maximization(True) or minimization(False) logdir : str, optional, (default None) The directory in which to store created models. If not set, will use a temporary directory. max_iter : int, (default 20) number of optimization trials search_range : int/float/Dict (default 4) The `search_range` specifies the range of parameter values to search for. If `search_range` is an int/float, it is used as the global search range for parameters. This creates a search problem on the following space: optimization on [initial value / search_range, initial value * search_range] If `search_range` is a dict, it must contain the same keys as for `params_dict`. In this case, `search_range` specifies a per-parameter search range. This is useful in case some parameters have a larger natural range than others. For a given hyperparameter `hp` this would create the following search range: optimization on hp on [initial value[hp] / search_range[hp], initial value[hp] * search_range[hp]] logfile : str, optional (default None) Name of logfile to write results to. If specified, this is must be a valid file. If not specified, results of hyperparameter search will be written to `logdir/.txt`. Returns ------- Tuple[`best_model`, `best_hyperparams`, `all_scores`] `(best_model, best_hyperparams, all_scores)` where `best_model` is an instance of `dc.model.Model`, `best_hyperparams` is a dictionary of parameters, and `all_scores` is a dictionary mapping string representations of hyperparameter sets to validation scores. """ try: from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO except ModuleNotFoundError: raise ValueError("This class requires pyGPGO to be installed.") # Specify logfile log_file = None if logfile: log_file = logfile elif logdir is not None: # Make logdir if it doesn't exist. if not os.path.exists(logdir): os.makedirs(logdir, exist_ok=True) log_file = os.path.join(logdir, "results.txt") # setup range param_range = compute_parameter_range(params_dict, search_range) param_keys = list(param_range.keys()) # Stores all results all_results = {} # Store all model references so we don't have to reload all_models = {} # Stores all model locations model_locations = {} # Demarcating internal function for readability def optimizing_function(**placeholders): """Private Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- placeholders : keyword arguments Should be various hyperparameters as specified in `param_keys` above. Returns: -------- valid_scores : float valid set performances """ hyper_parameters = {} for hp in param_keys: if param_range[hp][0] == "int": # param values are always float in BO, so this line converts float to int # see : https://github.com/josejimenezluna/pyGPGO/issues/10 hyper_parameters[hp] = int(placeholders[hp]) else: hyper_parameters[hp] = float(placeholders[hp]) logger.info("Running hyperparameter set: %s" % str(hyper_parameters)) if log_file: with open(log_file, 'w+') as f: # Record hyperparameters f.write("Parameters: %s" % str(hyper_parameters)) f.write('\n') hp_str = _convert_hyperparam_dict_to_filename(hyper_parameters) if logdir is not None: filename = "model%s" % hp_str model_dir = os.path.join(logdir, filename) logger.info("model_dir is %s" % model_dir) try: os.makedirs(model_dir) except OSError: if not os.path.isdir(model_dir): logger.info( "Error creating model_dir, using tempfile directory" ) model_dir = tempfile.mkdtemp() else: model_dir = tempfile.mkdtemp() # Add it on to the information needed for the constructor hyper_parameters["model_dir"] = model_dir model = self.model_builder(**hyper_parameters) model.fit(train_dataset) try: model.save() # Some models autosave except NotImplementedError: pass multitask_scores = model.evaluate(valid_dataset, [metric]) score = multitask_scores[metric.name] if log_file: with open(log_file, 'a') as f: # Record performances f.write("Score: %s" % str(score)) f.write('\n') # Store all results all_results[hp_str] = score # Store reference to model all_models[hp_str] = model model_locations[hp_str] = model_dir # GPGO maximize performance by default # set performance to its negative value for minimization if use_max: return score else: return -score # execute GPGO cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, optimizing_function, param_range) logger.info("Max number of iteration: %i" % max_iter) gpgo.run(max_iter=max_iter) hp_opt, valid_performance_opt = gpgo.getResult() hyper_parameters = {} for hp in param_keys: if param_range[hp][0] == "int": hyper_parameters[hp] = int(hp_opt[hp]) else: # FIXME: Incompatible types in assignment hyper_parameters[hp] = float(hp_opt[hp]) # type: ignore hp_str = _convert_hyperparam_dict_to_filename(hyper_parameters) # Let's fetch the model with the best parameters best_model = all_models[hp_str] # Compare best model to default hyperparameters if log_file: with open(log_file, 'a') as f: # Record hyperparameters f.write("params_dict:") f.write(str(params_dict)) f.write('\n') # Return default hyperparameters return best_model, hyper_parameters, all_results
ax.set_title('Gaussian Process surrogate') surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm, linewidth=0) fig.colorbar(surf, shrink=0.5, aspect=5) best = gpgo.best ax.scatter([best[0]], [best[1]], s=40, marker='x', c='r', label='Sampled point') plt.legend(loc='lower right') #plt.show() return Z if __name__ == '__main__': n_iter = 10 cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='ExpectedImprovement') param = {'x': ('cont', [0, 1]), 'y': ('cont', [0, 1])} np.random.seed(85) gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=1) for i in range(n_iter): fig = plt.figure(figsize=plt.figaspect(0.5)) fig.suptitle("Franke's function (Iteration {})".format(i+1)) gpgo.run(max_iter=1, resume=True) plotFranke() plotPred(gpgo) plt.show() #plt.savefig('/home/jose/gif/{}.png'.format(i), dpi=300)
CURRENT_EXP_DIR.split("_")[0] + "_0" + "/model/" + lang + "/corpus2.model") # Load binary truths binary_truth = np.loadtxt( "./data/" + lang + "/semeval2020_ulscd_" + lang[:3] + "/truth/binary.txt", dtype=str, delimiter="\t", ) # Creating a GP surrogate model with a Squared Exponantial # covariance function, aka kernel sexp = squaredExponential() sur_model = GaussianProcess(sexp) fitness = get_fitness_for_automl(model1, model2, binary_truth, logger) # setting the acquisition function acq = Acquisition(mode="ExpectedImprovement") # creating an object Bayesian Optimization bo = GPGO(sur_model, acq, fitness, param, n_jobs=4) bo._firstRun = functools.partial(myFirstRun, bo) bo.updateGP = functools.partial(myUpdateGP, bo) bo._firstRun(init_rand_configs=init_rand_configs) bo.logger._printInit(bo) bo.run(furtherEvaluations, resume=True) best = bo.getResult() logger.info("BEST PARAMETERS: " + ", ".join([k + ": " + str(v) for k, v in best[0].items()]) + ", ACCU: " + str(best[1])) logger.info("OPTIMIZATION HISTORY")
fig = plt.figure() a = np.array([-gpgo._acqWrapper(np.atleast_1d(x)) for x in x_test]).flatten() r = fig.add_subplot(1, 1, 1) r.set_title('Acquisition function') plt.plot(x_test, a, color='green') gpgo._optimizeAcq(method='L-BFGS-B', n_start=25) plt.axvline(x=gpgo.best, color='black', label='Found optima') plt.legend(loc=0) plt.tight_layout() plt.show() if __name__ == '__main__': np.random.seed(321) def f(x): return (np.sin(x)) sexp = squaredExponential() gp = GaussianProcessMCMC(sexp, step=pm.Slice) acq = Acquisition(mode='IntegratedExpectedImprovement') param = {'x': ('cont', [0, 2 * np.pi])} gpgo = GPGO(gp, acq, f, param, n_jobs=-1) gpgo._firstRun() for i in range(6): plotGPGO(gpgo, param) gpgo.updateGP()
str(best_param["test_MSE"]) + '\n') # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return best_param["test_MSE"] else: return -best_param["test_MSE"] from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='UCB') param = { 'radius': ('int', [2, 6]), 'T': ('int', [1, 5]), 'fingerprint_dim': ('int', [30, 300]), 'weight_decay': ('cont', [2, 6]), 'learning_rate': ('cont', [2, 5]), 'p_dropout': ('cont', [0, 0.5]) } np.random.seed(168) gpgo = GPGO(gp, acq, f, param) gpgo.run(max_iter=30, init_evals=2) # hp_opt, valid_performance_opt = gpgo.getResult()
# defining a dictionary on "x" param = { 'C': ('cont', [0.1, 5]), 'gamma': ('cont', [0.1, 10]), 'coef0': ('cont', [0.1, 10]) } # creating a GP surrogate model with a Squared Exponantial covariance function, # aka kernel sexp = squaredExponential() sur_model_1 = GaussianProcess(sexp) sur_model_2 = RandomForest() # setting the acquisition function acq_1 = Acquisition(mode="ExpectedImprovement") acq_2 = Acquisition(mode="ProbabilityImprovement") acq_3 = Acquisition(mode="UCB") # creating an object Bayesian Optimization gpgo_gaussian_model_1 = GPGO(sur_model_1, acq_1, compute_accuracy_SVC, param) gpgo_gaussian_model_2 = GPGO(sur_model_1, acq_2, compute_accuracy_SVC, param) gpgo_gaussian_model_3 = GPGO(sur_model_1, acq_3, compute_accuracy_SVC, param) gpgo_random_forest_1 = GPGO(sur_model_2, acq_1, compute_accuracy_SVC, param) gpgo_random_forest_2 = GPGO(sur_model_2, acq_2, compute_accuracy_SVC, param) gpgo_random_forest_3 = GPGO(sur_model_2, acq_3, compute_accuracy_SVC, param) #Run models gaussianModel_1_start = timeit.default_timer() gpgo_gaussian_model_1.run(max_iter=furtherEvaluations,
def zoom_scan(): choice = QtGui.QMessageBox.question(self, 'Precision Scan', "Do you want to get a more accurate point?", # We create a Message box and a choice for the user QtGui.QMessageBox.Yes | QtGui.QMessageBox.No) if choice == QtGui.QMessageBox.Yes: S_ini = (int(execution(ser, '?CNT4'))) # Motor 4 Y_ini = (int(execution(ser, '?CNT5'))) #?CNTn -> Read the position value of axis n Z_ini = (int(execution(ser, '?CNT6'))) print(S_ini, Y_ini, Z_ini) from pyGPGO.covfunc import matern32 # pyGPGO allow us to use the Bayesian optimization from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO if c == 1: # Used to convert µm in step and vice versa range_focus = convert_str_int(self.range_focus.text(), 1) # We let the user choose the focus range else: range_focus = convert_str_int(self.range_focus.text(), 20) intTval = convert_str_int(self.intT.text(),1000) spec.integration_time_micros(intTval) def scan_S(S, Y, Z): mouve(4, S_ini + S, 'ABSOL') #This time it also move on the s axis mouve(5, Y_ini + Y, 'ABSOL') mouve(6, Z_ini + Z, 'ABSOL') while execution(ser, "?ASTAT") != "RRRRRRUUU": #Wait for the end of the movement time.sleep(0.1) l = spec.intensities() # Read all intensities of the spectrum for each point s = slice(1500, 2100) m = slice(2500, 3500) threshold = np.std(l[m]) print(mean(l[m]), max(l[s])) if max(l[s])>3*mean(l[m]): # Work like the other threshold return max(l[s]) # Return the highest intensity else: return max(l[s])/3 # Reduce intensities' value that aren't above the threshold cov = matern32() # We choose the covariance function gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ExpectedImprovement') param = {'S': ('cont', [-(3 / 5) * range_focus, (2 / 5) * range_focus]), # We set the range for Y, Z and S 'Z': ('cont', [-diameterZ * 0.1, diameterZ * 0.1]), 'Y': ('cont', [-diameterY * 0.1, diameterY * 0.1])} N_baye = convert_str_int(self.N_baye.text(), 1) # We let the user choose for the number of iterations of the bayesian process gpgo = GPGO(gp, acq, scan_S, param) gpgo.run(max_iter=N_baye) # Launch of the bayesian process mouve(5, Y_ini + gpgo.getResultY(), 'ABSOL') mouve(6, Z_ini + gpgo.getResultZ(), 'ABSOL') mouve(4, S_ini + gpgo.getResultS(), 'ABSOL') while execution(ser, "?ASTAT") != "RRRRRRUUU": time.sleep(0.1) if c == 1: self.Yscan.setText(str(-1 * round(Y_ini - gpgo.getResultY(), 2))) # Print all the results on the GUI self.Zscan.setText(str(round(Z_ini - gpgo.getResultZ(), 2))) self.Imax.setText(str(round(gpgo.getResultI(), 0))) self.Smax.setText(str(round(S_ini + gpgo.getResultS(), 2))) else: self.Yscan.setText(str(-1 * round((Y_ini - gpgo.getResultY()) / 20, 2))) self.Zscan.setText(str(round((Z_ini - gpgo.getResultZ()) / 40, 2))) self.Imax.setText(str(round(gpgo.getResultI(), 0))) self.Smax.setText(str(round((S_ini + gpgo.getResultS()) / 20, 2))) else: pass
def Bayesian(self): from statistics import mean spec = sb.Spectrometer(devices[0]) intTval = convert_str_int(self.intT.text(), 1000) spec.integration_time_micros(intTval) if c == 1: diameterZ = convert_str_int(self.Di.text(), 2) diameterY = convert_str_int(self.Di.text(), 1) else: diameterZ = convert_str_int(self.Di.text(), 40) diameterY = convert_str_int(self.Di.text(), 20) print("Diameter Z and Y :", diameterY, diameterZ) S_ini = (int(execution(ser, '?CNT4'))) # Motor 4 Y_ini = (int(execution(ser, '?CNT5'))) Z_ini = (int(execution(ser, '?CNT6'))) print(S_ini, Y_ini, Z_ini) from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO if c == 1: range_focus = convert_str_int(self.range_focus.text(), 1) else: range_focus = convert_str_int(self.range_focus.text(), 20) intTval = convert_str_int(self.intT.text(), 1000) spec.integration_time_micros(intTval) def scan_F(S, Y, Z): mouve(4, S_ini + S, 'ABSOL') mouve(5, Y_ini + Y, 'ABSOL') mouve(6, Z_ini + Z, 'ABSOL') while execution(ser, "?ASTAT") != "RRRRRRUUU": time.sleep(0.1) l = spec.intensities() s = slice(1500, 2100) m = slice(2500, 3500) threshold = np.std(l[m]) if max(l[s]) > 3 * threshold: return max(l[s]) else: return None cov = matern32() gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ExpectedImprovement') param = {'S': ('cont', [-(4 / 5) * range_focus, (4 / 5) * range_focus]), 'Z': ('cont', [-diameterZ/2, diameterZ/2]), 'Y': ('cont', [-diameterY/2, diameterY/2])} N_baye = convert_str_int(self.N_baye.text(), 1) gpgo = GPGO(gp, acq, scan_F, param) gpgo.run(max_iter=N_baye) mouve(5, Y_ini + gpgo.getResultY(), 'ABSOL') mouve(6, Z_ini + gpgo.getResultZ(), 'ABSOL') mouve(4, S_ini + gpgo.getResultS(), 'ABSOL') while execution(ser, "?ASTAT") != "RRRRRRUUU": time.sleep(0.1) if c == 1: self.Yscan.setText(str(-1 * round(Y_ini - gpgo.getResultY(), 2))) self.Zscan.setText(str(round(Z_ini - gpgo.getResultZ(), 2))) self.Imax.setText(str(round(gpgo.getResultI(), 0))) self.Smax.setText(str(round(S_ini + gpgo.getResultS(), 2))) else: self.Yscan.setText(str(-1 * round((Y_ini - gpgo.getResultY()) / 20, 2))) self.Zscan.setText(str(round((Z_ini - gpgo.getResultZ()) / 40, 2))) self.Imax.setText(str(round(gpgo.getResultI(), 0))) self.Smax.setText(str(round((S_ini + gpgo.getResultS()) / 20, 2))) spec.close() devices = []
def test_acq(): for mode in modes: acq = Acquisition(mode=mode) acq.eval(tau, mean, std)
def Scan(self): from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO spec.integration_time_micros(100 * 1000) def scan_f(Z, Y): l1 = spec.intensities() print("\n") print("Before :", int(Y), int(Z), int(positionvalue(5)), int(positionvalue(6)), int(max(l1))) #mouve(4, S, 'ABSOL') mouve(5, Y, 'ABSOL') mouve(6, Z, 'ABSOL') if positionvalue(5) != Y and positionvalue(6) != Z: time.sleep(0.6) l2 = spec.intensities() lw = spec.wavelengths() W_m = lw[np.array(l2).argmax()] print("After : ", int(Y), int(Z), "Y =", int(positionvalue(5)), "Z =", int(positionvalue(6)), "Imax =", int(max(l2))) print("Longeur d'onde Imax :", W_m) return max(l2) cov = matern32() gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ExpectedImprovement') param = {'Z': ('cont', [0, 120 * 35]), 'Y': ('cont', [0, 120 * 20])} #np.random.seed(20) gpgo = GPGO(gp, acq, scan_f, param) gpgo.run(max_iter=10) gpgo.getResultZ() # print("Z max :", gpgo.getResultZ()) gpgo.getResultY() print("Y Max:", gpgo.getResultY()) print(gpgo.getResult()) mouve(5, gpgo.getResultY(), 'ABSOL') mouve(6, gpgo.getResultZ(), 'ABSOL') def scan_S(S, Y, Z): execution(ser, 'CRES4') # CRESn -> reset current position for an axis execution(ser, 'CRES5') execution(ser, 'CRES6') mouve(4, S, 'RELAT') mouve(5, Y, 'RELAT') mouve(6, Z, 'RELAT') if positionvalue(4) != S: time.sleep(0.2) l = spec.intensities() # return max(l) # cov = matern32() gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ProbabilityImprovement') param = { 'S': ('cont', [-150, 150]), 'Z': ('cont', [-50, 50]), 'Y': ('cont', [-50, 50]) } # np.random.seed(20) gpgo = GPGO(gp, acq, scan_S, param) gpgo.run(max_iter=10) mouve(5, gpgo.getResultY(), 'ABSOL') mouve(6, gpgo.getResultZ(), 'ABSOL') mouve(4, gpgo.getResultS(), 'ABSOL')
from pyGPGO.GPGO import GPGO from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.acquisition import Acquisition from pyGPGO.covfunc import squaredExponential def evaluateModel(C, gamma): clf = SVC(C=10**C, gamma=10**gamma) return np.average(cross_val_score(clf, X, y)) if __name__ == '__main__': np.random.seed(20) X, y = make_moons(n_samples=200, noise=0.3) cm_bright = ListedColormap(['#fc4349', '#6dbcdb']) fig = plt.figure() plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright) plt.show() sexp = squaredExponential() gp = GaussianProcess(sexp, optimize=True, usegrads=True) acq = Acquisition(mode='UCB', beta=1.5) params = {'C': ('cont', (-4, 5)), 'gamma': ('cont', (-4, 5))} gpgo = GPGO(gp, acq, evaluateModel, params) gpgo.run(max_iter=50) gpgo.getResult()
plt.plot(x_test.flatten(), y_hat) plt.subplot(5, 1, index) a = np.array([-gpgo._acqWrapper(np.atleast_1d(x)) for x in x_test]).flatten() plt.plot(x_test, a, color=colors[index - 2], label=acq_titles[index - 2]) gpgo._optimizeAcq(method='L-BFGS-B', n_start=1000) plt.axvline(x=gpgo.best) plt.legend(loc=0) if __name__ == '__main__': def f(x): return (np.sin(x)) acq_1 = Acquisition(mode='ExpectedImprovement') acq_2 = Acquisition(mode='ProbabilityImprovement') acq_3 = Acquisition(mode='UCB', beta=0.5) acq_4 = Acquisition(mode='UCB', beta=1.5) acq_list = [acq_1, acq_2, acq_3, acq_4] sexp = squaredExponential() param = {'x': ('cont', [0, 2 * np.pi])} new = True colors = ['green', 'red', 'orange', 'black'] acq_titles = [ r'Expected improvement', r'Probability of Improvement', r'GP-UCB $\beta = .5$', r'GP-UCB $\beta = 1.5$' ] for index, acq in enumerate(acq_list): np.random.seed(200)
def hyperparam_search( self, params_dict, train_dataset, valid_dataset, output_transformers, metric, prot_desc_dict, prot_desc_length, tasks=None, direction=True, n_features=1024, n_tasks=1, max_iter=20, search_range=4, early_stopping=True, evaluate_freq=3, patience=3, model_dir="./model_dir", hp_invalid_list=[ 'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts', 'n_pair_feat', 'fit_transformers', 'min_child_weight', 'weight_init_stddevs', 'max_delta_step', 'subsample', 'colsample_bylevel', 'bias_init_consts', 'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight', 'base_score', 'layer_sizes' ], log_file='GPhypersearch.log', mode='classification', tensorboard=True, no_concordance_index=False, no_r2=False, plot=False, verbose_search=False, aggregated_tasks=[]): """Perform hyperparams search using a gaussian process assumption params_dict include single-valued parameters being optimized, which should only contain int, float and list of int(float) parameters with names in hp_invalid_list will not be changed. For Molnet models, self.model_class is model name in string, params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class] Parameters ---------- params_dict: dict dict including parameters and their initial values parameters not suitable for optimization can be added to hp_invalid_list train_dataset: dc.data.Dataset struct dataset used for training valid_dataset: dc.data.Dataset struct dataset used for validation(optimization on valid scores) output_transformers: list of dc.trans.Transformer transformers for evaluation metric: list of dc.metrics.Metric metric used for evaluation direction: bool maximization(True) or minimization(False) n_features: int number of input features n_tasks: int number of tasks max_iter: int number of optimization trials search_range: int(float) optimization on [initial values / search_range, initial values * search_range] hp_invalid_list: list names of parameters that should not be optimized logfile: string name of log file, hyperparameters and results for each trial will be recorded Returns ------- hyper_parameters: dict params_dict with all optimized values valid_performance_opt: float best performance on valid dataset """ #assert len(metric) == 1, 'Only use one metric' hyper_parameters = params_dict hp_list = list(hyper_parameters.keys()) for hp in hp_invalid_list: if hp in hp_list: hp_list.remove(hp) hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list] assert set(hp_list_class) <= set([list, int, float]) # Float or int hyper parameters(ex. batch_size, learning_rate) hp_list_single = [ hp_list[i] for i in range(len(hp_list)) if not hp_list_class[i] is list ] # List of float or int hyper parameters(ex. layer_sizes) hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]])) for i in range(len(hp_list)) if hp_list_class[i] is list] # Number of parameters n_param = len(hp_list_single) if len(hp_list_multiple) > 0: n_param = n_param + sum([hp[1] for hp in hp_list_multiple]) # Range of optimization param_range = [] for hp in hp_list_single: if hyper_parameters[hp].__class__ is int: param_range.append((('int'), [ hyper_parameters[hp] // search_range, hyper_parameters[hp] * search_range ])) else: param_range.append((('cont'), [ hyper_parameters[hp] / search_range, hyper_parameters[hp] * search_range ])) for hp in hp_list_multiple: if hyper_parameters[hp[0]][0].__class__ is int: param_range.extend([(('int'), [ hyper_parameters[hp[0]][i] // search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) else: param_range.extend([(('cont'), [ hyper_parameters[hp[0]][i] / search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) # Dummy names param_name = ['l' + format(i, '02d') for i in range(20)] param = dict(zip(param_name[:n_param], param_range)) data_dir = './logs' log_file = os.path.join(data_dir, log_file) def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = list( map(int, hyper_parameters[hp[0]])) i = i + hp[1] opt_epoch = -1 print(hyper_parameters) nonlocal model_dir pdb.set_trace() # Run benchmark with open(log_file, 'a') as f: # Record hyperparameters f.write(str(hyper_parameters)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): if mode == 'classification': train_scores, valid_scores, _, opt_epoch = model_classification( train_dataset, valid_dataset, valid_dataset, tasks, output_transformers, n_features, metric, self.model_class, prot_desc_dict, prot_desc_length, hyper_parameters=hyper_parameters, early_stopping=early_stopping, evaluate_freq=evaluate_freq, patience=patience, direction=direction, model_dir=model_dir, tensorboard=tensorboard, no_concordance_index=no_concordance_index, verbose_search=verbose_search, log_file=log_file, no_r2=no_r2, aggregated_tasks=aggregated_tasks) elif mode == 'regression' or mode == 'reg-threshold': train_scores, valid_scores, _, opt_epoch = model_regression( train_dataset, valid_dataset, valid_dataset, tasks, output_transformers, n_features, metric, self.model_class, prot_desc_dict, prot_desc_length, hyper_parameters=hyper_parameters, early_stopping=early_stopping, evaluate_freq=evaluate_freq, patience=patience, direction=direction, model_dir=model_dir, tensorboard=tensorboard, no_concordance_index=no_concordance_index, verbose_search=verbose_search, log_file=log_file, no_r2=no_r2, aggregated_tasks=aggregated_tasks) else: raise ValueError("Invalid mode!") # similar to fit() function in tensor_graph.py, we also use combination here. if n_tasks > 1: val_scores = valid_scores[self.model_class]['averaged'] else: val_scores = valid_scores[self.model_class] score = 0 if mode == 'regression': for mtc in metric: mtc_name = mtc.metric.__name__ composite_mtc_name = mtc.name if mtc_name == 'rms_score': score += val_scores[composite_mtc_name] if mtc_name == 'r2_score' or mtc_name == 'pearson_r2_score': if no_r2: coef = 0.0 else: coef = -0.5 score += coef * val_scores[composite_mtc_name] if mtc_name == 'concordance_index': score += -val_scores[composite_mtc_name] elif mode == 'reg-threshold' or mode == 'classification': for mtc in metric: mtc_name = mtc.metric.__name__ composite_mtc_name = mtc.name if mtc_name == 'roc_auc_score': score += val_scores[composite_mtc_name] if mtc_name == 'prc_auc_score': score += val_scores[composite_mtc_name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance( [metric]) score = multitask_scores[metric.name] #pdb.set_trace() if early_stopping: best_score = opt_epoch[1] opt_epoch = opt_epoch[0] epoch_stmt = str( opt_epoch) + " is the optimum number of epochs found." print(epoch_stmt) with open(log_file, 'a') as f: # Record performances f.write(self.model_class) f.write('\n') f.write(epoch_stmt) f.write('\n') f.write(str(score)) f.write('\n') if early_stopping: f.write(str(best_score)) f.write('\n') if not early_stopping: best_score = score # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return best_score else: return -best_score import pyGPGO from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO with open(log_file, 'a') as file: file.write( "------------------------------------------------------------------" ) file.write('\n') cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) print("Max number of iteration: %i" % max_iter) gpgo.run(max_iter=max_iter) hp_opt, valid_performance_opt = gpgo.getResult() # Readout best hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(hp_opt[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(hp_opt[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = list( map(int, hyper_parameters[hp[0]])) i = i + hp[1] opt_epoch = -1 # Compare best model to default hyperparameters with open(log_file, 'a') as f: # Record hyperparameters f.write(str(params_dict)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): if mode == 'classification': train_scores, valid_scores, _, opt_epoch = model_classification( train_dataset, valid_dataset, valid_dataset, tasks, output_transformers, n_features, metric, self.model_class, prot_desc_dict, prot_desc_length, hyper_parameters=params_dict, early_stopping=early_stopping, evaluate_freq=evaluate_freq, patience=patience, direction=direction, model_dir=model_dir, tensorboard=tensorboard, no_concordance_index=no_concordance_index, verbose_search=verbose_search, log_file=log_file, no_r2=no_r2, aggregated_tasks=aggregated_tasks) elif mode == 'regression' or mode == 'reg-threshold': train_scores, valid_scores, _, opt_epoch = model_regression( train_dataset, valid_dataset, valid_dataset, tasks, output_transformers, n_features, metric, self.model_class, prot_desc_dict, prot_desc_length, hyper_parameters=params_dict, early_stopping=early_stopping, evaluate_freq=evaluate_freq, patience=patience, direction=direction, model_dir=model_dir, tensorboard=tensorboard, no_concordance_index=no_concordance_index, verbose_search=verbose_search, log_file=log_file, no_r2=no_r2, aggregated_tasks=aggregated_tasks) else: raise ValueError("Invalid mode!") if n_tasks > 1: val_scores = valid_scores[self.model_class]['averaged'] else: val_scores = valid_scores[self.model_class] score = 0 if mode == 'regression': for mtc in metric: mtc_name = mtc.metric.__name__ composite_mtc_name = mtc.name if mtc_name == 'rms_score': score += val_scores[composite_mtc_name] if mtc_name == 'r2_score' or mtc_name == 'pearson_r2_score': if no_r2: coef = 0.0 else: coef = -0.5 score += coef * val_scores[composite_mtc_name] if mtc_name == 'concordance_index': score += -val_scores[composite_mtc_name] elif mode == 'reg-threshold' or mode == 'classification': for mtc in metric: mtc_name = mtc.metric.__name__ composite_mtc_name = mtc.name if mtc_name == 'roc_auc_score': score += val_scores[composite_mtc_name] if mtc_name == 'prc_auc_score': score += val_scores[composite_mtc_name] if early_stopping: best_score = opt_epoch[1] opt_epoch = opt_epoch[0] epoch_stmt = str( opt_epoch) + " is the optimum number of epochs found." print(epoch_stmt) #pdb.set_trace() with open(log_file, 'a') as f: f.write(epoch_stmt) f.write('\n') # Record performances f.write(str(score)) f.write('\n') if early_stopping: f.write(str(best_score)) f.write('\n') if not early_stopping: best_score = score # I have changed the determination criteria from score to best_score. if not direction: best_score = -best_score if best_score > valid_performance_opt: # Default model is better, return hyperparameters return params_dict, best_score # Return optimized hyperparameters return hyper_parameters, valid_performance_opt
def test_acq_mcmc(): for mode in modes_mcmc: acq = Acquisition(mode=mode) print(acq.eval(tau, means, stds))
def Scan(self): from pyGPGO.covfunc import matern32 # pyGPGO est le module qui permet d'éffectuer l'optimisation bayèsienne from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO diameterZ = convert_str_int( self.Di.text(), 35 ) # On définit le diamètre en fonction de l'indication de l'utilisateur, 35 est le facteur de conversion de Z diameterY = convert_str_int( self.Di.text(), 20 ) # La fonction convert_str_int() convertit une chaîne de caractère et la multiplie par le 2ième argument range_focus = convert_str_int( self.focus.text(), 0.5 ) # On divise cette valeur par 2 car l'intervalle va être [-range_focus;range_focus] spec.integration_time_micros( 100 * 1000 ) #On définit le temps d'intégration sans laisser le choix à l'utilisateur def scan_f(Z, Y): l1 = spec.intensities() #print("\n") #print("Before :", int(Y), int(Z), int(positionvalue(5)), int(positionvalue(6)), int(max(l1))) #mouve(4, S, 'ABSOL') mouve(5, Y, 'ABSOL') mouve(6, Z, 'ABSOL') #print(execution(ser, "?ASTAT")) #time.sleep(0.2) #print(execution(ser, "?ASTAT")) #time.sleep(1.2)# #print(execution(ser, "?ASTAT")) ## while execution(ser, "?ASTAT") != "RRRRRRUUU": time.sleep(0.1) l2 = spec.intensities() lw = spec.wavelengths() W_m = lw[np.array(l2).argmax()] #print("After : ", int(Y), int(Z), "Y =",int(positionvalue(5)),"Z =", int(positionvalue(6)),"Imax =", int(max(l2))) #print("Longeur d'onde Imax :",W_m) # return max(l2) cov = matern32() gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ExpectedImprovement') param = { 'Z': ('cont', [0, diameterZ]), # On définit les intervalles de Z et Y 'Y': ('cont', [0, diameterY]) } #np.random.seed(20) gpgo = GPGO(gp, acq, scan_f, param) gpgo.run( max_iter=10 ) # On lance l'optimisation e nindiquant le nombre d'itérations à réaliser gpgo.getResultZ() print("Z max :", gpgo.getResultZ()) gpgo.getResultY() print("Y Max:", gpgo.getResultY()) print(gpgo.getResult()) mouve(5, gpgo.getResultY(), 'ABSOL') # On déplace la cellule aux coordonnées du rubis mouve(6, gpgo.getResultZ(), 'ABSOL') execution( ser, 'CRES4' ) # CRESn -> reset current position for an axis # On réinitialise l'origine de tout les points execution( ser, 'CRES5' ) # Cela est toujours nécéssaires pour se replacer sur les coordonnées execution(ser, 'CRES6') # du rubis après la nouvelle optimisation def scan_S(S, Y, Z): mouve( 4, S, 'RELAT' ) # Cette fois on se déplace aussi en profondeur, sur l'axe S mouve(5, Y, 'RELAT') mouve(6, Z, 'RELAT') while execution( ser, "?ASTAT" ) != "RRRRRRUUU": # On attend la fin du déplacement time.sleep(0.1) l = spec.intensities() #On relève le spèctre # return max( l) # On renvoie le maximun d'intensité su spectre obtenu # cov = matern32() gp = GaussianProcess(cov, optimize=True, usegrads=True) acq = Acquisition(mode='ProbabilityImprovement') param = { 'S': ('cont', [-range_focus, range_focus ]), # On définit les intervalles de valeurs de S,Y et Z 'Z': ('cont', [-50, 50]), 'Y': ('cont', [-50, 50]) } # #np.random.seed(20) gpgo = GPGO(gp, acq, scan_S, param) gpgo.run(max_iter=10 ) # On lance l'lgorithme qui doit éffectuer 10 itérations mouve(5, gpgo.getResultY(), 'ABSOL') mouve(6, gpgo.getResultZ(), 'ABSOL') mouve(4, gpgo.getResultS(), 'ABSOL') end = time.time() # Permet de mesurer le temps d'optimisation print(end - start) self.Yscan.setText(str(round( -1 * gpgo.getResultY(), 2))) # On affiche tous les résultats sur l'interface graphique self.Zscan.setText(str(round(gpgo.getResultZ(), 2))) self.Imax.setText(str(round(gpgo.getResultI(), 0))) self.Smax.setText(str(round(gpgo.getResultS(), 2)))
def hyperparam_search( self, params_dict, train_dataset, valid_dataset, output_transformers, metric, direction=True, n_features=1024, n_tasks=1, max_iter=20, search_range=4, hp_invalid_list=[ 'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts', 'n_pair_feat', 'fit_transformers', 'min_child_weight', 'max_delta_step', 'subsample', 'colsample_bylevel', 'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight', 'base_score' ], log_file='GPhypersearch.log'): """Perform hyperparams search using a gaussian process assumption params_dict include single-valued parameters being optimized, which should only contain int, float and list of int(float) parameters with names in hp_invalid_list will not be changed. For Molnet models, self.model_class is model name in string, params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class] Parameters ---------- params_dict: dict dict including parameters and their initial values parameters not suitable for optimization can be added to hp_invalid_list train_dataset: dc.data.Dataset struct dataset used for training valid_dataset: dc.data.Dataset struct dataset used for validation(optimization on valid scores) output_transformers: list of dc.trans.Transformer transformers for evaluation metric: list of dc.metrics.Metric metric used for evaluation direction: bool maximization(True) or minimization(False) n_features: int number of input features n_tasks: int number of tasks max_iter: int number of optimization trials search_range: int(float) optimization on [initial values / search_range, initial values * search_range] hp_invalid_list: list names of parameters that should not be optimized logfile: string name of log file, hyperparameters and results for each trial will be recorded Returns ------- hyper_parameters: dict params_dict with all optimized values valid_performance_opt: float best performance on valid dataset """ assert len(metric) == 1, 'Only use one metric' hyper_parameters = params_dict hp_list = hyper_parameters.keys() for hp in hp_invalid_list: if hp in hp_list: hp_list.remove(hp) hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list] assert set(hp_list_class) <= set([list, int, float]) # Float or int hyper parameters(ex. batch_size, learning_rate) hp_list_single = [ hp_list[i] for i in range(len(hp_list)) if not hp_list_class[i] is list ] # List of float or int hyper parameters(ex. layer_sizes) hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]])) for i in range(len(hp_list)) if hp_list_class[i] is list] # Number of parameters n_param = len(hp_list_single) if len(hp_list_multiple) > 0: n_param = n_param + sum([hp[1] for hp in hp_list_multiple]) # Range of optimization param_range = [] for hp in hp_list_single: if hyper_parameters[hp].__class__ is int: param_range.append((('int'), [ hyper_parameters[hp] // search_range, hyper_parameters[hp] * search_range ])) else: param_range.append((('cont'), [ hyper_parameters[hp] / search_range, hyper_parameters[hp] * search_range ])) for hp in hp_list_multiple: if hyper_parameters[hp[0]][0].__class__ is int: param_range.extend([(('int'), [ hyper_parameters[hp[0]][i] // search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) else: param_range.extend([(('cont'), [ hyper_parameters[hp[0]][i] / search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) # Dummy names param_name = ['l' + format(i, '02d') for i in range(20)] param = dict(zip(param_name[:n_param], param_range)) data_dir = os.environ['DEEPCHEM_DATA_DIR'] log_file = os.path.join(data_dir, log_file) def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] logger.info(hyper_parameters) # Run benchmark with open(log_file, 'a') as f: # Record hyperparameters f.write(str(hyper_parameters)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) score = valid_scores[self.model_class][metric[0].name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance(metric) score = multitask_scores[metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return score else: return -score import pyGPGO from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) logger.info("Max number of iteration: %i" % max_iter) gpgo.run(max_iter=max_iter) hp_opt, valid_performance_opt = gpgo.getResult() # Readout best hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(hp_opt[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(hp_opt[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] # Compare best model to default hyperparameters with open(log_file, 'a') as f: # Record hyperparameters f.write(str(params_dict)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=params_dict) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=params_dict) score = valid_scores[self.model_class][metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') if not direction: score = -score if score > valid_performance_opt: # Optimized model is better, return hyperparameters return params_dict, score # Return default hyperparameters return hyper_parameters, valid_performance_opt
""" Plots Franke's function """ x = np.linspace(0, 1, num=1000) y = np.linspace(0, 1, num=1000) X, Y = np.meshgrid(x, y) Z = f(X, Y) fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm, linewidth=0) fig.colorbar(surf, shrink=0.5, aspect=5) plt.show() if __name__ == '__main__': plotFranke() cov = matern32() # Using a matern v=3/2 covariance kernel gp = GaussianProcess(cov) # A Gaussian Process regressor without hyperparameter optimization acq = Acquisition(mode='ExpectedImprovement') # Expected Improvement acquisition function param = {'x': ('cont', [0, 1]), 'y': ('cont', [0, 1])} # Specify parameter space np.random.seed(1337) gpgo = GPGO(gp, acq, f, param) # Call GPGO class gpgo.run(max_iter=10) # 10 iterations gpgo.getResult() # Get your result