def gen_data_to_s3(bucket, obj_func_min, num_pts, which_IS, key): search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ]) points = search_domain.generate_uniform_random_points_in_domain(num_pts) vals = [obj_func_min.evaluate(which_IS, pt) for pt in points] noise = obj_func_min.noise_and_cost_func(which_IS, None) * np.ones(num_pts) data = {"points": points, "vals": vals, "noise": noise} send_data_to_s3(bucket, key, data)
def gen_data_to_pickle(directory, obj_func_min, num_pts, which_IS, filename): search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ]) points = search_domain.generate_uniform_random_points_in_domain(num_pts) vals = [obj_func_min.evaluate(which_IS, pt) for pt in points] noise = obj_func_min.noise_and_cost_func(which_IS, None) * np.ones(num_pts) data = {"points": points, "vals": vals, "noise": noise} with open(filename, "wb") as file: pickle.dump(data, file)
def coldstart_gen_data(obj_func_min, num_init_pts, num_replications, directory): """ generate initial data for experiments and store in pickle """ for replication_no in range(num_replications): filename = "{0}/{1}_{2}_points_each_repl_{3}.pickle".format(directory, obj_func_min.getFuncName(), num_init_pts, replication_no) search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain]) # this file is used below again and hence should be made available there, too points = search_domain.generate_uniform_random_points_in_domain(num_init_pts) vals = [obj_func_min.evaluate(0, pt) for pt in points] data = {"points": points, "vals": vals, "noise": obj_func_min.noise_and_cost_func(0, None)[0] * numpy.ones(num_init_pts)} with open(filename, "wb") as file: pickle.dump(data, file)
def find_best_mu_ei(gp, domain_bounds, num_multistart): search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_mu = numpy.inf for start_point in start_points: x, f = bfgs_optimization(start_point, compute_mu(gp), domain_bounds) if min_mu > f: min_mu = f point = x return min_mu, point
def optimize_hyperparameters(problem_search_domain, points_sampled, points_sampled_value, points_sampled_noise_variance, upper_bound_noise_variances=10., consider_small_variances=True, hyper_prior=None, num_restarts=32, num_jobs=16): ''' Fit hyperparameters from data using MLE or MAP (described in Poloczek, Wang, and Frazier 2016) :param problem_search_domain: The search domain of the benchmark, as provided by the benchmark :param points_sampled: An array that gives the points sampled so far. Each points has the form [IS dim0 dim1 ... dimn] :param points_sampled_value: An array that gives the values observed at the points in same ordering :param upper_bound_noise_variances: An upper bound on the search interval for the noise variance parameters (before squaring) :param consider_small_variances: If true, half of the BFGS starting points have entries for the noise parameters set to a small value :param hyper_prior: use prior for MAP estimate if supplied, and do MLE otherwise :param num_restarts: number of starting points for BFGS to find MLE/MAP :param num_jobs: number of parallelized BFGS instances :return: An array with the best found values for the hyperparameters ''' approx_grad = True upper_bound_signal_variances = numpy.maximum( 10., numpy.var(points_sampled_value)) # pick huge upper bounds hyper_bounds = generate_hyperbounds(problem_search_domain, upper_bound_noise_variances, upper_bound_signal_variances) hyperparam_search_domain = pythonTensorProductDomain( [ClosedInterval(bd[0], bd[1]) for bd in hyper_bounds]) hyper_multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain( num_restarts) for i in xrange(num_restarts): init_hyper = hyper_multistart_pts[i] # if optimization is enabled, make sure that small variances are checked despite multi-modality # this optimization seems softer than using a MAP estimate if consider_small_variances and (i % 2 == 0): init_hyper[ -1] = 0.1 # use a small value as starting point for noise parameters in BFGS hyper_multistart_pts[i] = init_hyper parallel_results = Parallel(n_jobs=num_jobs)( delayed(hyper_opt)(points_sampled, points_sampled_value, points_sampled_noise_variance, init_hyper, hyper_bounds, approx_grad, hyper_prior) for init_hyper in hyper_multistart_pts) # print min(parallel_results,key=itemgetter(1)) best_hyper = min( parallel_results, key=itemgetter(1) )[0] # recall that we negated the log marginal likelihood when passing it to BFGS return best_hyper
def check_ave_min(func_idx): num_repl = 500 func = func_list[func_idx] search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in func._search_domain]) min_vals = np.zeros((num_repl, len(num_pts_list))) for i, num_pts in enumerate(num_pts_list): for repl in range(num_repl): points = search_domain.generate_uniform_random_points_in_domain( num_pts) min_vals[repl, i] = np.amin([func.evaluate(0, pt) for pt in points]) return np.mean(min_vals, axis=0).tolist()
def coldstart_gen_hyperdata(primary_obj_func_min, list_other_obj_func_min, num_pts, directory): """ generate data for hyperparameter optimization and store in pickle """ filename = "{0}/hyper_{1}_points_{2}_{3}.pickle".format(directory, num_pts, primary_obj_func_min.getFuncName(), "_".join([func.getFuncName() for func in list_other_obj_func_min])) search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in primary_obj_func_min._search_domain]) # this file is used below again and hence should be made available there, too points = search_domain.generate_uniform_random_points_in_domain(num_pts) vals = [[primary_obj_func_min.evaluate(0, pt) for pt in points]] noise = [primary_obj_func_min.noise_and_cost_func(0, None)] for obj_func in list_other_obj_func_min: vals.append([obj_func.evaluate(0, pt) for pt in points]) noise.append(obj_func.noise_and_cost_func(0, None)) data = {"points": points, "vals": vals, "noise": noise} with open(filename, "wb") as file: pickle.dump(data, file)
def test_normal_prior(self): space_dim = 2 num_IS = 2 true_hyper, data = get_random_gp_data(space_dim, num_IS, 500) hyperparam_search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in numpy.repeat([[0.01, 2.]], len(true_hyper), axis=0)]) hyper_bounds = [(0.01, 100.) for i in range(len(true_hyper))] multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain(1) cov = MixedSquareExponential(hyperparameters=multistart_pts[0,:], total_dim=space_dim+1, num_is=num_IS) test_prior = NormalPrior(5.*numpy.ones(len(true_hyper)), 25. * numpy.eye(len(true_hyper))) hyper_test, f, output = hyper_opt(cov, data=data, init_hyper=multistart_pts[0, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=test_prior) good_prior = NormalPrior(true_hyper, 0.1 * numpy.eye(len(true_hyper))) hyper_good_prior, _, _ = hyper_opt(cov, data=data, init_hyper=multistart_pts[0, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=good_prior) bad_prior = NormalPrior(numpy.ones(len(true_hyper)), 0.1 * numpy.eye(len(true_hyper))) hyper_bad_prior, _, _ = hyper_opt(cov, data=data, init_hyper=multistart_pts[0, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=bad_prior) print "true hyper: {0}\n hyper test: {1}\n good prior: {2}\n bad prior:\n should close to one {3}".format(true_hyper, hyper_test, hyper_good_prior, hyper_bad_prior) print "dim {0}, num_is {1}".format(space_dim, num_IS)
def global_optimization_of_GP(gp_model, bounds, num_multistart, minimization=True): """ :param gp_model: :param bounds: list of (min, max) tuples :param num_multistart: :param minimization: :return: shape(space_dim+1,), best x and first entry is always zero because we assume IS0 is truth IS """ sgn = 1 if minimization else -1 fcn = lambda x: gp_model.compute_mean_of_points( np.concatenate([[0], x]).reshape((1, -1)))[0] * sgn grad = lambda x: gp_model.compute_grad_mean_of_points( np.concatenate([[0], x]).reshape( (1, -1)), num_derivatives=1)[0, 1:] * sgn search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_fcn = np.inf for start_pt in start_points: result_x, result_f, output = scipy.optimize.fmin_l_bfgs_b( func=fcn, x0=start_pt, fprime=grad, args=(), approx_grad=False, bounds=bounds, m=10, factr=10.0, pgtol=1e-10, epsilon=1e-08, iprint=-1, maxfun=15000, maxiter=200, disp=0, callback=None) if result_f < min_fcn: min_fcn = result_f ret = result_x print "found GP min {0}".format(min_fcn) return np.concatenate([[0], ret]).reshape((1, -1))
def optimize_with_ego(gp, domain_bounds, num_multistart): expected_improvement_evaluator = ExpectedImprovement(gp) search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_negative_ei = numpy.inf def negative_ego_func(x): expected_improvement_evaluator.set_current_point(x.reshape((1, -1))) return -1.0 * expected_improvement_evaluator.compute_expected_improvement( ) for start_point in start_points: x, f = bfgs_optimization(start_point, negative_ego_func, domain_bounds) if min_negative_ei > f: min_negative_ei = f point_to_sample = x return point_to_sample, -min_negative_ei
def optimize_entropy(pes, pes_model, space_dim, num_discretization, cost_func, list_sample_is, bounds=None): if not bounds: bounds = [(0., 1.)] * space_dim # fcn = lambda x: np.mean([pes.acquisition({'obj': pes_model}, {}, np.concatenate([[which_is], x]), current_best=None, compute_grad=False)[0,0] for pes_model in pes_model_list]) * -1. / cost # search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in bounds]) # start_points = search_domain.generate_uniform_random_points_in_domain(num_multistart) # min_fcn = np.inf # for start_pt in start_points: # result_x, result_f, output = scipy.optimize.fmin_l_bfgs_b(func=fcn, x0=start_pt, fprime=None, args=(), approx_grad=True, # bounds=bounds, m=10, factr=10.0, pgtol=1e-10, # epsilon=1e-08, iprint=-1, maxfun=15000, maxiter=200, disp=0, callback=None) # if result_f < min_fcn: # min_fcn = result_f # ret = result_x # return np.concatenate([[which_is], ret]), -min_fcn search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in bounds]) points = search_domain.generate_uniform_random_points_in_domain( num_discretization) raw_acq = [] # for tuning costs best_acq = -np.inf for which_is in list_sample_is: acq_list = pes.acquisition( {'obj': pes_model}, {}, np.hstack((np.ones((num_discretization, 1)) * which_is, points)), current_best=None, compute_grad=False) / cost_func(which_is, None) inner_best_idx = np.argmax(acq_list) raw_acq.append(acq_list[inner_best_idx] * cost_func(which_is, None)) if acq_list[inner_best_idx] > best_acq: best_acq = acq_list[inner_best_idx] best_is = which_is best_idx = inner_best_idx return points[best_idx, :], best_is, best_acq, raw_acq
def optimize_with_multifidelity_ei(gp_list, domain_bounds, num_IS, num_multistart, noise_and_cost_func): multifidelity_expected_improvement_evaluator = MultifidelityExpectedImprovement( gp_list, noise_and_cost_func) search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_negative_ei = numpy.inf def negative_ei_func(x): return -1.0 * multifidelity_expected_improvement_evaluator.compute_expected_improvement( x) for start_point in start_points: x, f = bfgs_optimization(start_point, negative_ei_func, domain_bounds) if min_negative_ei > f: min_negative_ei = f point_to_sample = x return point_to_sample, multifidelity_expected_improvement_evaluator.choose_IS( point_to_sample), -min_negative_ei
def get_random_gp_data(space_dim, num_is, num_data_each_is, kernel_name): """ Generate random gp data :param space_dim: :param num_is: :param num_data_each_is: :param kernel_name: currently it's either 'mix_exp' or 'prod_ker' :return: """ sample_var = 0.01 if kernel_name == "mix_exp": hyper_params = numpy.random.uniform(size=(num_is + 1) * (space_dim + 1)) cov = MixedSquareExponential(hyper_params, space_dim + 1, num_is) elif kernel_name == "prod_ker": hyper_params = numpy.random.uniform(size=(num_is + 1) * (num_is + 2) / 2 + space_dim + 1) cov = ProductKernel(hyper_params, space_dim + 1, num_is + 1) else: raise NotImplementedError("invalid kernel") python_search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in numpy.repeat([[-10., 10.]], space_dim + 1, axis=0) ]) data = HistoricalData(space_dim + 1) init_pts = python_search_domain.generate_uniform_random_points_in_domain(2) init_pts[:, 0] = numpy.zeros(2) data.append_historical_data(init_pts, numpy.zeros(2), numpy.ones(2) * sample_var) gp = GaussianProcess(cov, data) points = python_search_domain.generate_uniform_random_points_in_domain( num_data_each_is) for pt in points: for i in range(num_is): pt[0] = i val = gp.sample_point_from_gp(pt, sample_var) data.append_sample_points([ [pt, val, sample_var], ]) gp = GaussianProcess(cov, data) return hyper_params, data
def generate_data(self, num_data): python_search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in self._info_dict['search_domain'] ]) data = HistoricalData(self._info_dict['dim']) init_pts = python_search_domain.generate_uniform_random_points_in_domain( 2) init_pts[:, 0] = numpy.zeros(2) data.append_historical_data(init_pts, numpy.zeros(2), numpy.ones(2) * self._sample_var_1) gp = GaussianProcess(self._cov, data) points = python_search_domain.generate_uniform_random_points_in_domain( num_data) for pt in points: pt[0] = numpy.ceil(numpy.random.uniform(high=2.0, size=1)) sample_var = self._sample_var_1 if pt[ 0] == 1 else self._sample_var_2 val = gp.sample_point_from_gp(pt, sample_var) data.append_sample_points([ [pt, val, sample_var], ]) gp = GaussianProcess(self._cov, data) return data
"StybTang": StybTang( act_var, low_dim, high_to_low, sign, bx_size, noise_var=noise_var ), "MNIST": MNIST(act_var, low_dim, high_to_low, sign, bx_size), } objective_func = obj_func_dict[obj_func_name] dim = int(objective_func._dim) num_initial_points = initial_n num_fidelity = objective_func._num_fidelity inner_search_domain = pythonTensorProductDomain( [ ClosedInterval( objective_func._search_domain[i, 0], objective_func._search_domain[i, 1] ) for i in range(objective_func._search_domain.shape[0] - num_fidelity) ] ) cpp_search_domain = cppTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in objective_func._search_domain] ) cpp_inner_search_domain = cppTensorProductDomain( [ ClosedInterval( objective_func._search_domain[i, 0], objective_func._search_domain[i, 1] ) for i in range(objective_func._search_domain.shape[0] - num_fidelity) ] )
obj_func_max = Rosenbrock(numIS, mult=-1.0) # used by KG obj_func_min = Rosenbrock( numIS, mult=1.0) # our original problems are all assumed to be minimization! # less important params exploitation_threshold = 1e-5 num_x_prime = 3000 num_discretization_before_ranking = num_x_prime * 3 num_iterations = 100 num_threads = 64 num_multistart = 64 num_candidate_start_points = 500 ### end parameter search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_max._search_domain ]) noise_and_cost_func = obj_func_min.noise_and_cost_func # Load initial data from pickle init_pts = load_init_points_for_all_IS("pickles", init_data_pickle_filename, obj_func_min._numIS) init_vals = load_vals("pickles", init_data_pickle_filename, obj_func_min._numIS) #init_pts, init_vals = sample_initial_points.load_data_from_a_min_problem("pickles", init_data_pickle_filename) # setup benchmark result container multi_kg_result = BenchmarkResult(num_iterations, obj_func_max._dim, benchmark_result_table_name) kg_hyper_param = pandas.read_sql_table( 'multifidelity_kg_hyperparam_' + func_name,
data_list, bias_sq_list = createHistoricalDataForMisoEI(obj_func_min.getDim(), listPrevData, directory=pathToPickles, bias_filename=bias_filename) ############################################### ############################################### ### Begin hyper opt hyper_result = [] for data in data_list: # Setup prior for MAP prior_mean = np.concatenate(([np.var(data.points_sampled_value)], [1.]*obj_func_min.getDim())) prior_sig = np.eye(obj_func_min.getDim()+1) * 100. prior_sig[0,0] = np.power(prior_mean[0]/5., 2.) prior = NormalPrior(prior_mean, prior_sig) hyper_bounds = [(0.1, prior_mean[i]+2.*np.sqrt(prior_sig[i,i])) for i in range(obj_func_min.getDim()+1)] print "hyper bound {0}".format(hyper_bounds) hyperparam_search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds]) multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain(num_hyper_multistart) best_f = np.inf cov = SquareExponential(prior_mean) for i in range(num_hyper_multistart): hyper, f, output = hyper_opt(cov, data=data, init_hyper=multistart_pts[i, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=prior) # print output if f < best_f: best_hyper = hyper best_f = f print 'best_hyper=' + str(best_hyper) print 'best_f= ' + str(best_f) print "prior mean is: {0}".format(prior_mean) hyper_result = np.concatenate((hyper_result, best_hyper)) sql_util.write_array_to_table("mei_hyper_{0}".format(obj_func_min.getFuncName()), hyper_result)
def miso_gen_data(): """ This script intend to do the same thing as sample_initial_points.py, with the only difference that it calls AssembleToOrderPES as objective, which place truth_is at IS0. This is required by Entropy Search algo and also makes sense when truth IS is accessible. """ ### Need to set the following parameters! #obj_func_min = RosenbrockShifted( ) obj_func_min = AssembleToOrderPES(mult=-1.0) # obj_func_min = RosenbrockNoiseFreePES(mult=1.0) # obj_func_min = RosenbrockNewNoiseFreePES(mult=1.0) # list of IS that are to be queried list_IS_to_query = obj_func_min.getList_IS_to_query() #[1,2,3] # [0]# for coldstart # range(obj_func_min._num_IS) #string_list_IS_to_query = 'IS_' + '_'.join(str(element) for element in list_IS_to_query) # print string_list_IS_to_query # exit(0) # create initial data for runs num_init_pts_each_IS = 20 ###5 # for Rosenbrock # 20 # for ATO num_replications = 100 # # create data for hyper opt. # num_init_pts_each_IS = 200 # num_replications = 3 allows_parallelization = True # set to True if each simulator/IS can be queried multiple times simultaneously # is True for rosenbrock and ATO # is False for dragAndLift ### end directory = "/fs/europa/g_pf/pickles/miso" for replication_no in range(num_replications): filename = obj_func_min.getFuncName() + '_' + 'IS_' + '_'.join(str(element) for element in list_IS_to_query) \ + '_' + str(num_init_pts_each_IS) + "_points_each" if num_replications > 1: filename += '_repl_' + str(replication_no) print 'filename=' + filename search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain]) # this file is used below again and hence should be made available there, too init_points_for_all_IS = [] init_vals_all_IS = [] is_list = [] def parallel_func(IS, pt): return obj_func_min.evaluate(IS, pt) num_parallel_jobs = num_init_pts_each_IS # Jialei's original choice if(('ato' in obj_func_min.getFuncName()) and (num_parallel_jobs > 10)): # do not start too many MATLAB instances num_parallel_jobs = 10 if(not allows_parallelization): num_parallel_jobs = 1 index_Array = 0 # which entry of the array to write into? with Parallel(n_jobs=num_parallel_jobs) as parallel: for index_IS in list_IS_to_query: print "{0}th IS".format(index_IS) points = search_domain.generate_uniform_random_points_in_domain(num_init_pts_each_IS) init_points_for_all_IS.append(points) vals = parallel(delayed(parallel_func)(index_IS, pt) for pt in init_points_for_all_IS[index_Array]) init_vals_all_IS.append(vals) is_list.append(numpy.ones(num_init_pts_each_IS)*index_IS) index_Array +=1 print "min value: {0}".format(numpy.amin(init_vals_all_IS)) data = {"points": init_points_for_all_IS, "vals": init_vals_all_IS, "IS": is_list} with open("{0}/{1}.pickle".format(directory, filename), "wb") as file: pickle.dump(data, file)
based on Jialei's Rosenbrock code -- Many Thanks! ''' # the next lines are dependent on the problem func_name = 'assembleToOrder' obj_func_max = AssembleToOrder(numIS=4) num_pts_to_gen = 100 # numpy.repeat( 250, obj_func_max.getNumIS()) hyper_bounds = [ (0.01, 100) for i in range((obj_func_max.getDim() + 1) * (obj_func_max.getNumIS() + 1)) ] num_hyper_multistart = 5 search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_max.getSearchDomain() ]) ### Gen points for hyperparam estimation data = HistoricalData(obj_func_max.getDim() + 1) # should go into the objective func obj for i in range(obj_func_max.getNumIS()): pts = search_domain.generate_uniform_random_points_in_domain( num_pts_to_gen) vals = [obj_func_max.evaluate(i + 1, pt) for pt in pts] IS_pts = numpy.hstack(((i + 1) * numpy.ones(num_pts_to_gen).reshape( (-1, 1)), pts)) sample_vars = [ obj_func_max.noise_and_cost_func(i + 1, pt)[0] for pt in pts ] data.append_historical_data(IS_pts, vals, sample_vars)
def obtainHistoricalDataForEGO(load_historical_data_from_pickle, obj_func_min, directoryToPickles, list_IS_to_query, num_init_pts_each_IS, init_data_pickle_filename=''): ''' Create Historical Data object for EGO that contains initial data. If truthIS is among the IS, then load only the data from that one Args: load_historical_data_from_pickle: if True load from pickle otherwise do a random Latin hypercube design obj_func_min: the problem directoryToPickles: path to the directory that contains the pickle files list_IS_to_query: list of the IS that should be queried, e.g. [0, 1, 2] num_init_pts_each_IS: how many points for each IS - is either used to find right pickle or to determine the number of points to sample init_data_pickle_filename: optional parameter that gives the filename of the pickle to load Returns: HistoricalData object ''' historical_data = HistoricalData(obj_func_min._dim) if (load_historical_data_from_pickle): # To load the pickled data, do: if (init_data_pickle_filename == ''): init_data_pickle_filename = obj_func_min.getFuncName() + '_' + 'IS_' \ + '_'.join(str(element) for element in list_IS_to_query) + '_' \ + str(num_init_pts_each_IS) + "_points_each" init_pts_array, init_vals_array = load_data_from_a_min_problem( directoryToPickles, init_data_pickle_filename) # if truthIS is among the sampled, then load only that one: if obj_func_min.getTruthIS() in list_IS_to_query: indexArray = list_IS_to_query.index(obj_func_min.getTruthIS()) sample_vars = [ obj_func_min.noise_and_cost_func(obj_func_min.getTruthIS(), pt)[0] for pt in init_pts_array[indexArray] ] historical_data.append_historical_data(init_pts_array[indexArray], init_vals_array[indexArray], sample_vars) else: # load data for all IS indexArray = 0 for index_IS in list_IS_to_query: sample_vars = [ obj_func_min.noise_and_cost_func(index_IS, pt)[0] for pt in init_pts_array[indexArray] ] historical_data.append_historical_data( init_pts_array[indexArray], init_vals_array[indexArray], sample_vars) indexArray += 1 else: # generate initial data from querying random points for each IS for index_IS in list_IS_to_query: if (obj_func_min.getTruthIS() in list_IS_to_query) and ( index_IS != obj_func_min.getTruthIS()): continue # the truthIS is observed but this is another IS: skip! search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ]) pts = search_domain.generate_uniform_random_points_in_domain( num_init_pts_each_IS) vals = [obj_func_min.evaluate(index_IS, pt) for pt in pts] sample_vars = [ obj_func_min.noise_and_cost_func(index_IS, pt)[0] for pt in pts ] historical_data.append_historical_data(pts, vals, sample_vars) return historical_data
pickle_vals(directory, func_name, obj_func_min.getNumIS(), vals) if __name__ == "__main__": ### Need to set the following parameters! obj_func_min = DragAndLift(mult=1.0) directory = "pickles" #num_init_pts_each_IS = 10 allows_parallelization = False # set to True if each simulator/IS can be queried multiple times simultaneously # is True for rosenbrock and ATO # is False for dragAndLift ### end # specific to each scenario search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min.getSearchDomain() ]) # this file is used below again and hence should be made available there, too lastExistingSetId = 1 # prevent existing datasets from being overwritten for num_init_pts_each_IS in [10, 10]: # [5, 10, 10, 5, 5]: init_points_for_all_IS = [] # IS 1 and 2 at the same points points = search_domain.generate_uniform_random_points_in_domain( num_init_pts_each_IS) init_points_for_all_IS.append(points) init_points_for_all_IS.append(points) # IS 3 and 4 at the same points points = search_domain.generate_uniform_random_points_in_domain(
allows_parallelization = True # set to True if each simulator/IS can be queried multiple times simultaneously # is True for rosenbrock and ATO # is False for dragAndLift ### end directory = "../pickles/csCentered" for replication_no in range(num_replications): filename = obj_func_min.getFuncName() + '_' + 'IS_' + '_'.join(str(element) for element in list_IS_to_query) \ + '_' + str(num_init_pts_each_IS) + "_points_each" if num_replications > 1: filename += '_repl_' + str(replication_no) print 'filename=' + filename search_domain = pythonTensorProductDomain( [ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ] ) # this file is used below again and hence should be made available there, too init_points_for_all_IS = [] init_vals_all_IS = [] def parallel_func(IS, pt): return obj_func_min.evaluate(IS, pt) num_parallel_jobs = num_init_pts_each_IS # Jialei's original choice if (('ato' in obj_func_min.getFuncName()) and (num_parallel_jobs > 10)): # do not start too many MATLAB instances num_parallel_jobs = 10 if (not allows_parallelization): num_parallel_jobs = 1
hist_data_grad.append_historical_data(gp_grad_info_dict['points'], gp_grad_info_dict['values'], gp_grad_info_dict['vars']) print gp_grad_info_dict['values'] objective_func = synthetic_functions.RandomGP( gp_grad_info_dict['dim'], gp_grad_info_dict['hyper_params'], hist_data_grad) hyper_params = gp_grad_info_dict['hyper_params'] init_pts = [[-2.0], [0.0], [0.3], [0.5]] ymax = 1 else: objective_func = obj_func_dict[obj_func_name] #init_data = utils.get_init_data_from_db(objective_func._dim, objective_func._sample_var, utils.sql_engine, 'init_points_'+obj_func_name) python_search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in objective_func._search_domain ]) cpp_search_domain = cppTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in objective_func._search_domain ]) result = numpy.zeros((num_iteration, 6)) best_so_far_kg = numpy.zeros((end_idx - start_idx, num_iteration + 1)) # begin job for job_no in xrange(start_idx, end_idx): python_search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in objective_func._search_domain ])
def optimize_hyperparameters(num_IS, problem_search_domain, points_sampled, points_sampled_value, upper_bound_noise_variances=10., consider_small_variances=True, hyper_prior=None, num_restarts=32, num_jobs=16): ''' Fit hyperparameters from data using MLE or MAP (described in Poloczek, Wang, and Frazier 2016) :param num_IS: The total number of information sources :param problem_search_domain: The search domain of the benchmark, as provided by the benchmark :param points_sampled: An array that gives the points sampled so far. Each points has the form [IS dim0 dim1 ... dimn] :param points_sampled_value: An array that gives the values observed at the points in same ordering :param upper_bound_noise_variances: An upper bound on the search interval for the noise variance parameters (before squaring) :param consider_small_variances: If true, half of the BFGS starting points have entries for the noise parameters set to a small value :param hyper_prior: use prior for MAP estimate if supplied, and do MLE otherwise :param num_restarts: number of starting points for BFGS to find MLE/MAP :param num_jobs: number of parallelized BFGS instances :return: An array with the best found values for the hyperparameters ''' approx_grad = True upper_bound_signal_variances = numpy.maximum( 10., numpy.var(points_sampled_value)) # pick huge upper bounds hyper_bounds = generate_hyperbounds(num_IS, problem_search_domain, upper_bound_noise_variances, upper_bound_signal_variances) hyperparam_search_domain = pythonTensorProductDomain( [ClosedInterval(bd[0], bd[1]) for bd in hyper_bounds]) hyper_multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain( num_restarts) dim = len( problem_search_domain ) + 1 # 1 + the dimension of the search space that the points are from # best_f = numpy.inf for i in xrange(num_restarts): init_hyper = hyper_multistart_pts[i] # if optimization is enabled, make sure that small variances are checked despite multi-modality # this optimization seems softer than using a MAP estimate if consider_small_variances and (i % 2 == 0): for j in xrange(num_IS): init_hyper[ -1 - j] = 0.1 # use a small value as starting point for noise parameters in BFGS # # print init_hyper # # print hyper_bounds # # print len(init_hyper) # # print len(hyper_bounds) # # print hyper_multistart_pts.shape # # exit(0) # # If hypers are optimized sequentially # hyper, f, output = hyper_opt(num_IS, dim, points_sampled, points_sampled_value, init_hyper, hyper_bounds, approx_grad) # # print output # if f < best_f: # recall that we negated the log marginal likelihood when passing it to BFGS # best_hyper = hyper # best_f = f # # print "itr {0}, hyper: {1}, negative log marginal likelihood: {2}".format(i, hyper, f) # # only if opt. hypers in parallel: hyper_multistart_pts[i] = init_hyper parallel_results = Parallel(n_jobs=num_jobs)( delayed(hyper_opt)(num_IS, dim, points_sampled, points_sampled_value, init_hyper, hyper_bounds, approx_grad, hyper_prior) for init_hyper in hyper_multistart_pts) # print min(parallel_results,key=itemgetter(1)) best_hyper = min( parallel_results, key=itemgetter(1) )[0] # recall that we negated the log marginal likelihood when passing it to BFGS # print 'best_hyper = ' + str(best_hyper) + ' with -log(prob[Y|D]) = ' \ # + str(min(parallel_results,key=itemgetter(1))[1]) \ # + ' for upper_bound_noise_variances = ' + str(upper_bound_noise_variances) # # hyperparameters_without_noise = best_hyper[:(num_IS * dim)] # # noise_hyperparameters = best_hyper[(num_IS * dim):] # # print compute_covariance_matrix(dim, hyperparameters_without_noise, noise_hyperparameters, points_sampled) # # test using hypers from big dataset # def obj_func(x): # ''' # The negative marginal loglikelihood for hyperparameters x # Args: # x: the hyperparameters, including noise hyperparameters appended to the hyperparameters of the kernels # # Returns: The negated value of the marginal loglikelihood at hyperparameters x # ''' # # # split x into hyperparameters and noise_hyperparameters # # For each IS there are dim signal variances and length scales # hyperparameters_without_noise = x[:(num_IS * dim)] # noise_hyperparameters = x[(num_IS * dim):] # # print 'hyperparameters_without_noise = ' + str(hyperparameters_without_noise) # # print 'noise_hyperparameters = ' + str(noise_hyperparameters) # # # # compute the parts of the marginal loglikelihood # covariance_matrix = compute_covariance_matrix(dim, hyperparameters_without_noise, noise_hyperparameters, points_sampled) # # K_chol = scipy.linalg.cho_factor(covariance_matrix, lower=True, overwrite_a=True) # K_inv_y = scipy.linalg.cho_solve(K_chol, points_sampled_value) # # # This BFGS minimizes but we wish to maximize, thus negate the log marginal likelihood # return -1.0 * compute_log_likelihood(K_chol, K_inv_y, points_sampled_value) # # hypers for RbRemi on large dataset with known noise # init_hyper = numpy.array([6.99174646e+05, 7.26756985e-01, 3.04331525, 1.20070203, # 1.65571854e-01, 3.28218161e-01, 1e-1, 1e-1]) # # # hypers for RbNew on large dataset with known noise # # init_hyper = numpy.array([ 6.89212443e+05, 7.06559876e-01, 2.98432914e+00, 2.05984746e+00, # # 1.16904675e-01, 2.23726117e-01, 1.0, 1e-1]) # print 'hypers from large dataset with known noise have -log p(Y|D) of ' + str(obj_func(init_hyper)) # hyper, f, output = hyper_opt(num_IS, dim, points_sampled, points_sampled_value, init_hyper, hyper_bounds, approx_grad) # print 'starting from opt hyper = ' + str(hyper) + ", f=" + str(f) #+ ", output = " + str(output) # exit(0) return best_hyper