def object_comparison(): from HyperSphere.GP.models.gp_regression import GPRegression from HyperSphere.GP.kernels.modules.matern52 import Matern52 from HyperSphere.GP.kernels.modules.squared_exponential import SquaredExponentialKernel from HyperSphere.BO.shadow_inference.inference_slide_both import ShadowInference as si1 from HyperSphere.BO.shadow_inference.inference_slide_origin import ShadowInference as si2 from HyperSphere.BO.shadow_inference.inference_slide_origin import ShadowInference as si3 model1 = GPRegression(Matern52(3)) model2 = GPRegression(SquaredExponentialKernel(3)) a = si1((Variable(torch.randn(10, 3)), Variable(torch.randn(10, 3))), model1) b = a.__class__((a.train_x, a.train_y), model1) print(a.__class__ is b.__class__) print(a.__class__) print(b.__class__)
def __init__(self, ndim, original2cube=None, cube2original=None): super(NonArdSpearmintBayesianOptimization, self).__init__(original2cube, cube2original) self.surrogate_model = GPRegression( kernel=Matern52(ndim=ndim, ard=False)) self.surrogate_inference = Inference self.acquisition_function = expected_improvement
def __init__(self, ndim, original2cube=None, cube2original=None): super(CylindricalKernelBayesianOptimization, self).__init__(original2cube, cube2original) self.radius = ndim**0.5 radius_input_map = Kumaraswamy(ndim=1, max_input=self.radius) self.surrogate_model = GPRegression( kernel=RadializationKernel(max_power=3, search_radius=self.radius, radius_input_map=radius_input_map)) self.surrogate_inference = origin_ShadowInference self.acquisition_function = expected_improvement
from HyperSphere.GP.models.gp_regression import GPRegression from HyperSphere.BO.acquisition.acquisition_maximization import acquisition from HyperSphere.feature_map.functionals import x_radial, id_transform ndata = 10 ndim = 2 search_radius = ndim**0.5 x_input = Variable(torch.FloatTensor(ndata, ndim).uniform_(-1, 1)) x_input.data[0, :] = 0 x_input.data[1, :] = 1 output = torch.cos(x_input[:, 0:1] + (x_input[:, 1:2] / math.pi * 0.5) + torch.prod(x_input, 1, keepdim=True)) reference = torch.min(output).data.squeeze()[0] train_data = (x_input, output) model_rect = GPRegression(kernel=Matern52(ndim, id_transform)) kernel_input_map = x_radial model_sphere1 = GPRegression( kernel=Matern52(kernel_input_map.dim_change(ndim), kernel_input_map)) model_sphere2 = GPRegression( kernel=Matern52(kernel_input_map.dim_change(ndim), kernel_input_map)) inference_rect = Inference((x_input, output), model_rect) inference_sphere1 = Inference((x_input, output), model_sphere1) inference_sphere2 = ShadowInference((x_input, output), model_sphere2) inference_rect.model_param_init() inference_sphere1.model_param_init() inference_sphere2.model_param_init() params_rect = inference_rect.learning(n_restarts=10) params_sphere1 = inference_sphere1.learning(n_restarts=10)
def BO(n_eval=200, path=None, func=None, ndim=None): assert (path is None) != (func is None) if path is not None: if not os.path.exists(path): path = os.path.join(EXPERIMENT_DIR, path) model_filename = os.path.join(path, 'model.pt') data_config_filename = os.path.join(path, 'data_config.pkl') model = torch.load(model_filename) data_config_file = open(data_config_filename, 'r') for key, value in pickle.load(data_config_file).iteritems(): exec(key + '=value') data_config_file.close() inference = ShadowInference((x_input, output), model) else: assert (func.dim == 0) != (ndim is None) if ndim is None: ndim = func.dim dir_list = [ elm for elm in os.listdir(EXPERIMENT_DIR) if os.path.isdir(os.path.join(EXPERIMENT_DIR, elm)) ] folder_name = func.__name__ + '_D' + str( ndim) + '_' + exp_str + '_' + datetime.now().strftime( '%Y%m%d-%H:%M:%S:%f') os.makedirs(os.path.join(EXPERIMENT_DIR, folder_name)) model_filename = os.path.join(EXPERIMENT_DIR, folder_name, 'model.pt') data_config_filename = os.path.join(EXPERIMENT_DIR, folder_name, 'data_config.pkl') search_sphere_radius = ndim**0.5 x_input = Variable(torch.ger(-torch.arange(0, 2), torch.ones(ndim))) output = Variable(torch.zeros(x_input.size(0), 1)) for i in range(x_input.size(0)): output[i] = func(x_input[i]) model = GPRegression(kernel=RadializationKernel( max_power=3, search_radius=search_sphere_radius)) time_list = [time.time()] * 2 elapse_list = [0, 0] pred_mean_list = [0, 0] pred_std_list = [0, 0] pred_var_list = [0, 0] pred_stdmax_list = [1, 1] pred_varmax_list = [1, 1] reference_list = [output.data.squeeze()[0]] * 2 refind_list = [1, 1] dist_to_ref_list = [0, 0] sample_info_list = [(10, 0, 10)] * 2 inference = ShadowInference((x_input, output), model) inference.init_parameters() inference.sampling(n_sample=1, n_burnin=99, n_thin=1) stored_variable_names = locals().keys() ignored_variable_names = [ 'n_eval', 'path', 'data_config_file', 'dir_list', 'folder_name', 'next_ind', 'model_filename', 'data_config_filename', 'i', 'kernel_input_map', 'model', 'inference' ] stored_variable_names = set(stored_variable_names).difference( set(ignored_variable_names)) bnd = radial_bound(search_sphere_radius) for _ in range(3): print('Experiment based on data in ' + os.path.split(model_filename)[0]) for _ in range(n_eval): inference = ShadowInference((x_input, output), model) reference, ref_ind = torch.min(output, 0) reference = reference.data.squeeze()[0] gp_hyper_params = inference.sampling(n_sample=10, n_burnin=0, n_thin=1) inferences = deepcopy_inference(inference, gp_hyper_params) x0_cand = optimization_candidates(x_input, output, -1, 1) x0, sample_info = optimization_init_points(x0_cand, inferences, reference=reference) next_x_point, pred_mean, pred_std, pred_var, pred_stdmax, pred_varmax = suggest( inferences, x0=x0, bounds=bnd, reference=reference) time_list.append(time.time()) elapse_list.append(time_list[-1] - time_list[-2]) pred_mean_list.append(pred_mean.squeeze()[0]) pred_std_list.append(pred_std.squeeze()[0]) pred_var_list.append(pred_var.squeeze()[0]) pred_stdmax_list.append(pred_stdmax.squeeze()[0]) pred_varmax_list.append(pred_varmax.squeeze()[0]) reference_list.append(reference) refind_list.append(ref_ind.data.squeeze()[0] + 1) dist_to_ref_list.append( torch.sum((next_x_point - x_input[ref_ind].data)**2)**0.5) sample_info_list.append(sample_info) x_input = torch.cat([x_input, Variable(next_x_point)], 0) output = torch.cat([output, func(x_input[-1])]) min_ind = torch.min(output, 0)[1] min_loc = x_input[min_ind] min_val = output[min_ind] dist_to_suggest = torch.sum((x_input - x_input[-1]).data**2, 1)**0.5 dist_to_min = torch.sum((x_input - min_loc).data**2, 1)**0.5 out_of_box = torch.sum((torch.abs(x_input.data) > 1), 1) print('') for i in range(x_input.size(0)): time_str = time.strftime('%H:%M:%S', time.gmtime( time_list[i])) + '(' + time.strftime( '%H:%M:%S', time.gmtime(elapse_list[i])) + ') ' data_str = ( '%3d-th : %+12.4f(R:%8.4f[%4d]/ref:[%3d]%8.4f), sample([%2d] best:%2d/worst:%2d), ' 'mean : %+.4E, std : %.4E(%5.4f), var : %.4E(%5.4f), ' '2ownMIN : %8.4f, 2curMIN : %8.4f, 2new : %8.4f' % (i + 1, output.data.squeeze()[i], torch.sum(x_input.data[i]**2) **0.5, out_of_box[i], refind_list[i], reference_list[i], sample_info_list[i][2], sample_info_list[i][0], sample_info_list[i][1], pred_mean_list[i], pred_std_list[i], pred_std_list[i] / pred_stdmax_list[i], pred_var_list[i], pred_var_list[i] / pred_varmax_list[i], dist_to_ref_list[i], dist_to_min[i], dist_to_suggest[i])) min_str = ' <========= MIN' if i == min_ind.data.squeeze( )[0] else '' print(time_str + data_str + min_str) print(model.kernel.__class__.__name__) torch.save(model, model_filename) stored_variable = dict() for key in stored_variable_names: stored_variable[key] = locals()[key] f = open(data_config_filename, 'w') pickle.dump(stored_variable, f) f.close() for _ in range(3): print('Experiment based on data in ' + os.path.split(model_filename)[0]) return os.path.split(model_filename)[0]
# next_point = suggest(inference, param_samples_sampling, reference=reference).numpy() # ax2.fill_between(pred_x.numpy().flatten(), 0, acq.numpy().flatten(), color=color, alpha=0.2, label=label) ax2.plot(pred_x.numpy(), acq.numpy(), color=color, ls=ls, alpha=1.0, label=label) ax2.legend() # ax2.axvline(next_point, color=color, ls='--', alpha=0.5) if __name__ == '__main__': from HyperSphere.GP.kernels.modules.squared_exponential import SquaredExponentialKernel from HyperSphere.GP.models.gp_regression import GPRegression from HyperSphere.GP.inference.inference import Inference import matplotlib.pyplot as plt ndata = 6 ndim = 1 model_for_generating = GPRegression(kernel=SquaredExponentialKernel(ndim)) train_x = Variable(torch.FloatTensor(ndata, ndim).uniform_(-2, 2)) chol_L = torch.potrf( (model_for_generating.kernel(train_x) + torch.diag(model_for_generating.likelihood(train_x))).data, upper=False) train_y = model_for_generating.mean(train_x) + Variable(torch.mm(chol_L, torch.randn(ndata, 1))) # train_y = torch.sin(2 * math.pi * torch.sum(train_x, 1, keepdim=True)) + Variable(torch.FloatTensor(train_x.size(0), 1).normal_()) train_data = (train_x, train_y) param_original = model_for_generating.param_to_vec() reference = torch.min(train_y.data) model_for_learning = GPRegression(kernel=SquaredExponentialKernel(ndim)) inference = Inference(train_data, model_for_learning) model_for_learning.vec_to_param(param_original) param_samples_learning = inference.learning(n_restarts=10) model_for_learning.vec_to_param(param_original) param_samples_sampling = inference.sampling(n_sample=5, n_burnin=200, n_thin=10)
from HyperSphere.GP.kernels.modules.radialization import RadializationKernel from HyperSphere.GP.models.gp_regression import GPRegression from HyperSphere.BO.acquisition.acquisition_maximization import acquisition ndata = 3 ndim = 2 search_radius = ndim**0.5 x_input = Variable(torch.FloatTensor(ndata, ndim).uniform_(-1, 1)) x_input.data[0, :] = 0 x_input.data[1, :] = 1 output = torch.cos(x_input[:, 0:1] + (x_input[:, 1:2] / math.pi * 0.5) + torch.prod(x_input, 1, keepdim=True)) reference = torch.min(output).data.squeeze()[0] train_data = (x_input, output) model_normal = GPRegression(kernel=RadializationKernel(3, search_radius)) model_shadow = GPRegression(kernel=RadializationKernel(3, search_radius)) inference_normal = Inference((x_input, output), model_normal) inference_shadow = ShadowInference((x_input, output), model_shadow) inference_normal.init_parameters() inference_shadow.init_parameters() params_normal = inference_normal.learning(n_restarts=5) inference_shadow.cholesky_update(model_normal.param_to_vec()) if ndim == 2: x1_grid, x2_grid = np.meshgrid(np.linspace(-1, 1, 50), np.linspace(-1, 1, 50)) x_pred_points = Variable( torch.from_numpy(
(pred_mean + pred_std).numpy().flatten(), alpha=0.2) ax.fill_between(pred_x.numpy().flatten(), (pred_mean - 1.96 * pred_std).numpy().flatten(), (pred_mean + 1.96 * pred_std).numpy().flatten(), alpha=0.2) ax.set_title(title_str + '\n%.4E' % nll) if __name__ == '__main__': from HyperSphere.GP.kernels.modules.squared_exponential import SquaredExponentialKernel from HyperSphere.GP.models.gp_regression import GPRegression import matplotlib.pyplot as plt ndata = 20 ndim = 1 model_for_generating = GPRegression(kernel=SquaredExponentialKernel(ndim)) train_x = Variable(torch.FloatTensor(ndata, ndim).uniform_(-2, 2)) chol_L = torch.potrf( (model_for_generating.kernel(train_x) + torch.diag(model_for_generating.likelihood(train_x))).data, upper=False) train_y = model_for_generating.mean(train_x) + Variable( torch.mm(chol_L, torch.randn(ndata, 1))) train_data = (train_x, train_y) param_original = model_for_generating.param_to_vec() generated_nll = Inference( train_data, model_for_generating).negative_log_likelihood().data[0, 0] model_for_learning = GPRegression(kernel=SquaredExponentialKernel(ndim)) inference = Inference(train_data, model_for_learning) model_for_learning.vec_to_param(param_original)
def BO(geometry=None, n_eval=200, path=None, func=None, ndim=None, boundary=False, ard=False, origin=False, warping=False, parallel=False): assert (path is None) != (func is None) if path is None: assert (func.dim == 0) != (ndim is None) assert geometry is not None if ndim is None: ndim = func.dim exp_conf_str = geometry if geometry == 'sphere': assert not ard exp_conf_str += 'warping' if warping else '' radius_input_map = Kumaraswamy(ndim=1, max_input=ndim** 0.5) if warping else None model = GPRegression( kernel=RadializationKernel(max_power=3, search_radius=ndim**0.5, radius_input_map=radius_input_map)) inference_method = None if origin and boundary: inference_method = both_ShadowInference exp_conf_str += 'both' elif origin: inference_method = origin_ShadowInference exp_conf_str += 'origin' elif boundary: inference_method = satellite_ShadowInference exp_conf_str += 'boundary' else: inference_method = Inference exp_conf_str += 'none' bnd = sphere_bound(ndim**0.5) elif geometry == 'cube': assert not origin exp_conf_str += ('ard' if ard else '') + ('boundary' if boundary else '') model = GPRegression(kernel=Matern52(ndim=ndim, ard=ard)) inference_method = satellite_ShadowInference if boundary else Inference bnd = (-1, 1) if not os.path.isdir(EXPERIMENT_DIR): raise ValueError( 'In file : ' + os.path.realpath(__file__) + '\nEXPERIMENT_DIR variable is not properly assigned. Please check it.' ) dir_list = [ elm for elm in os.listdir(EXPERIMENT_DIR) if os.path.isdir(os.path.join(EXPERIMENT_DIR, elm)) ] folder_name = func.__name__ + '_D' + str( ndim) + '_' + exp_conf_str + '_' + datetime.now().strftime( '%Y%m%d-%H:%M:%S:%f') os.makedirs(os.path.join(EXPERIMENT_DIR, folder_name)) logfile_dir = os.path.join(EXPERIMENT_DIR, folder_name, 'log') os.makedirs(logfile_dir) model_filename = os.path.join(EXPERIMENT_DIR, folder_name, 'model.pt') data_config_filename = os.path.join(EXPERIMENT_DIR, folder_name, 'data_config.pkl') x_input = Variable( torch.stack( [torch.zeros(ndim), torch.FloatTensor(ndim).uniform_(-1, 1)])) if func.func_name == 'stochastic_depth_resnet_cifar100': special_init_point = torch.FloatTensor([ -0.88672996375809265, -0.83845553984377363, -0.80082455589209434, -0.76868080609344613, -0.74002860499719103, -0.71384507914214379, -0.6895229479156415, -0.66666666534211871, -0.64500158781765049, -0.62432778870160499, -0.60449429448743319, -0.58538383736427368, -0.56690311453886821, -0.54897644926147593, -0.53154137077618735, -0.51454570980003023, -0.49794520561122835, -0.4817019618876005, -0.46578329447738975, -0.45016063464220946, -0.43480887900991927, -0.41970588594137237, -0.40483184457290511, -0.39016909932337462, -0.37570168000845294, -0.36141512736958714, -0.34729635533386094, -0.33333334161175654, -0.31951507564952675, -0.30583136944490208, -0.29227292909996905, -0.27883100126437665, -0.26549747264739709, -0.25226475894331168, -0.23912574658399377, -0.22607369983030123, -0.2131023835975443, -0.20020577167418563, -0.18737817967669568, -0.1746141913340078, -0.16190858934371632, -0.14925649319813961, -0.13665309066289877, -0.12409378040195429, -0.11157411163518405, -0.099089726169870107, -0.086636502479268351, -0.074210299199806373, -0.061807101474520065, -0.049422967019945307, -0.037054013082912562, -0.024696364163967699, -0.012346298973719083, 0 ]) x_input = torch.cat([x_input, Variable(special_init_point)]) n_init_eval = x_input.size(0) output = Variable(torch.zeros(n_init_eval, 1)) for i in range(n_init_eval): output[i] = func(x_input[i]) time_list = [time.time()] * n_init_eval elapse_list = [0] * n_init_eval pred_mean_list = [0] * n_init_eval pred_std_list = [0] * n_init_eval pred_var_list = [0] * n_init_eval pred_stdmax_list = [1] * n_init_eval pred_varmax_list = [1] * n_init_eval reference_list = [output.data.squeeze()[0]] * n_init_eval refind_list = [1] * n_init_eval dist_to_ref_list = [0] * n_init_eval sample_info_list = [(10, 0, 10)] * n_init_eval inference = inference_method((x_input, output), model) inference.init_parameters() inference.sampling(n_sample=1, n_burnin=99, n_thin=1) else: if not os.path.exists(path): path = os.path.join(EXPERIMENT_DIR, path) logfile_dir = os.path.join(path, 'log') model_filename = os.path.join(path, 'model.pt') data_config_filename = os.path.join(path, 'data_config.pkl') model = torch.load(model_filename) data_config_file = open(data_config_filename, 'r') for key, value in pickle.load(data_config_file).iteritems(): if key != 'logfile_dir': exec(key + '=value') data_config_file.close() ignored_variable_names = [ 'n_eval', 'path', 'i', 'key', 'value', 'logfile_dir', 'n_init_eval', 'data_config_file', 'dir_list', 'folder_name', 'model_filename', 'data_config_filename', 'kernel', 'model', 'inference', 'parallel', 'pool' ] stored_variable_names = set(locals().keys()).difference( set(ignored_variable_names)) if path is None: torch.save(model, model_filename) stored_variable = dict() for key in stored_variable_names: stored_variable[key] = locals()[key] f = open(data_config_filename, 'w') pickle.dump(stored_variable, f) f.close() print('Experiment based on data in %s' % os.path.split(model_filename)[0]) # multiprocessing conflicts with pytorch linear algebra operation pool = multiprocessing.Pool(N_INIT) if parallel else None for _ in range(n_eval): start_time = time.time() logfile = open( os.path.join(logfile_dir, str(x_input.size(0) + 1).zfill(4) + '.out'), 'w') inference = inference_method((x_input, output), model) reference, ref_ind = torch.min(output, 0) reference = reference.data.squeeze()[0] gp_hyper_params = inference.sampling(n_sample=10, n_burnin=0, n_thin=1) inferences = deepcopy_inference(inference, gp_hyper_params) x0_cand = optimization_candidates(x_input, output, -1, 1) x0, sample_info = optimization_init_points(x0_cand, reference=reference, inferences=inferences) next_x_point, pred_mean, pred_std, pred_var, pred_stdmax, pred_varmax = suggest( x0=x0, reference=reference, inferences=inferences, bounds=bnd, pool=pool) time_list.append(time.time()) elapse_list.append(time_list[-1] - time_list[-2]) pred_mean_list.append(pred_mean.squeeze()[0]) pred_std_list.append(pred_std.squeeze()[0]) pred_var_list.append(pred_var.squeeze()[0]) pred_stdmax_list.append(pred_stdmax.squeeze()[0]) pred_varmax_list.append(pred_varmax.squeeze()[0]) reference_list.append(reference) refind_list.append(ref_ind.data.squeeze()[0] + 1) dist_to_ref_list.append( torch.sum((next_x_point - x_input[ref_ind]).data**2)**0.5) sample_info_list.append(sample_info) x_input = torch.cat([x_input, next_x_point], 0) output = torch.cat([output, func(x_input[-1]).resize(1, 1)]) min_ind = torch.min(output, 0)[1] min_loc = x_input[min_ind] min_val = output[min_ind] dist_to_suggest = torch.sum((x_input - x_input[-1]).data**2, 1)**0.5 dist_to_min = torch.sum((x_input - min_loc).data**2, 1)**0.5 out_of_box = torch.sum((torch.abs(x_input.data) > 1), 1) print('') for i in range(x_input.size(0)): time_str = time.strftime('%H:%M:%S', time.gmtime( time_list[i])) + '(' + time.strftime( '%H:%M:%S', time.gmtime(elapse_list[i])) + ') ' data_str = ( '%3d-th : %+12.4f(R:%8.4f[%4d]/ref:[%3d]%8.4f), sample([%2d] best:%2d/worst:%2d), ' 'mean : %+.4E, std : %.4E(%5.4f), var : %.4E(%5.4f), ' '2ownMIN : %8.4f, 2curMIN : %8.4f, 2new : %8.4f' % (i + 1, output.data.squeeze()[i], torch.sum(x_input.data[i]**2) **0.5, out_of_box[i], refind_list[i], reference_list[i], sample_info_list[i][2], sample_info_list[i][0], sample_info_list[i][1], pred_mean_list[i], pred_std_list[i], pred_std_list[i] / pred_stdmax_list[i], pred_var_list[i], pred_var_list[i] / pred_varmax_list[i], dist_to_ref_list[i], dist_to_min[i], dist_to_suggest[i])) min_str = ' <========= MIN' if i == min_ind.data.squeeze( )[0] else '' print(time_str + data_str + min_str) logfile.writelines(time_str + data_str + min_str + '\n') logfile.close() torch.save(model, model_filename) stored_variable = dict() for key in stored_variable_names: stored_variable[key] = locals()[key] f = open(data_config_filename, 'w') pickle.dump(stored_variable, f) f.close() if parallel: pool.close() print('Experiment based on data in %s' % os.path.split(model_filename)[0]) return os.path.split(model_filename)[0]