def sample_intial_x_general(problem, num_initial_pts_per_s, points_x, exp_path, result_path): list_init_pts_value_noise = [] new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim()) repQL = problem.obj_func_min.repQL s_min = problem.obj_func_min.getSearchDomain()[0, 0] s_max = problem.obj_func_min.getSearchDomain()[0, 1] for s in np.linspace(s_min, s_max, num=problem.obj_func_min.getNums()): random_seeds = np.random.randint(900, size=num_initial_pts_per_s) points = np.hstack((s * np.ones(num_initial_pts_per_s).reshape( (-1, 1)), points_x)) vals_array, noise_array = np.zeros(num_initial_pts_per_s), np.zeros( num_initial_pts_per_s) i = -1 for (pt, random_seed) in zip(points, random_seeds): i += 1 value, noise_array[i] = problem.obj_func_min.evaluate( repQL, pt, random_seed, exp_path) vals_array[i] = -1.0 * value new_historical_data.append_historical_data(points, vals_array, noise_array) pts_value_noise = np.hstack((points, vals_array.reshape( (-1, 1)), noise_array.reshape((-1, 1)))) list_init_pts_value_noise.append(pts_value_noise) with open(result_path + '_initial_samples.txt', "w") as file: file.write(str(list_init_pts_value_noise)) with open(result_path + '_initial_samples.pickle', "wb") as file: dump(np.array(list_init_pts_value_noise), file) # print(list_init_pts_value_noise) return new_historical_data
def createHistoricalDataGeneral(dim_obj_func_min, listPrevData, mult, indexFirstIS=0): ''' Args: dim_obj_func_min: dim of the obj function, as given in obj_func_min._dim listPrevData: list of tuples (data, vals, noise) indexFirstIS: what is the number of the first IS given in listPrevData. Others are numbered consecutively Returns: HistoricalData object for KG (with additional first column that gives the IS the data corresponds to ''' data = HistoricalData(dim_obj_func_min + 1) indexIS = indexFirstIS # this is the number that corresponds to the IS-dimension in the GP for dataset in listPrevData: # add first column that gives the IS the data corresponds to IS_pts = numpy.hstack((indexIS * numpy.ones(len(dataset[0])).reshape( (-1, 1)), dataset[0])) # multiply all values by -1 since we assume that the training data stems from the minimization version # but misoKG uses the maximization version vals = mult * numpy.array(dataset[1]) data.append_historical_data(IS_pts, vals, dataset[2]) indexIS += 1 return data
def gp_mean_var( points_sampled, points_to_evaluate, rest_host=DEFAULT_HOST, rest_port=DEFAULT_PORT, testapp=None, **kwargs ): """Hit the rest endpoint for calculating the posterior mean and variance of a gaussian process, given points already sampled.""" endpoint = ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT[GP_MEAN_VAR_ROUTE_NAME] raw_payload = kwargs.copy() # Any options can be set via the kwargs ('covariance_info' etc.) raw_payload['points_to_evaluate'] = points_to_evaluate # Sanitize input points points_sampled_clean = [SamplePoint._make(point) for point in points_sampled] historical_data = HistoricalData( len(points_to_evaluate[0]), # The dim of the space sample_points=points_sampled_clean, ) if 'gp_historical_info' not in raw_payload: raw_payload['gp_historical_info'] = historical_data.json_payload() if 'domain_info' not in raw_payload: raw_payload['domain_info'] = {'dim': len(points_to_evaluate[0])} json_payload = json.dumps(raw_payload) json_response = call_endpoint_with_payload(rest_host, rest_port, endpoint, json_payload, testapp) output = GpMeanVarResponse().deserialize(json_response) return output.get('mean'), output.get('var')
def sample_initial_data(problem, num_initial_pts_per_IS): points = problem.obj_func_min.get_moe_domain().generate_uniform_random_points_in_domain(num_initial_pts_per_IS) points_dict = {} vals_dict = {} noise_dict = {} new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim() + 1) # increased by one for index of IS for IS in problem.obj_func_min.getList_IS_to_query(): points_dict[IS] = np.hstack((IS * np.ones(num_initial_pts_per_IS).reshape((-1, 1)), points)) vals_dict[IS] = np.array([-1.0 * problem.obj_func_min.evaluate(IS, pt) for pt in points]) noise_dict[IS] = np.ones(len(points)) * problem.obj_func_min.noise_and_cost_func(IS, None)[0] # note: misoKG will learn the noise from sampled data new_historical_data.append_historical_data(points_dict[IS], vals_dict[IS], noise_dict[IS]) return new_historical_data
def createHistoricalDataForMisoKGDiff(dim_obj_func_min, listPrevData, directory, bias_filename, mult=-1.0): """ This data is only used to train mKG hyperparams, and suppose listPrevData[0] is unbiased IS :param dim_obj_func_min: :param listPrevData: :param directory: :param bias_filename: :return: """ with open("{0}/{1}.pickle".format(directory, bias_filename), "rb") as input_file: bias_data = pickle.load(input_file) data_IS0 = HistoricalData(dim_obj_func_min) data_IS0.append_historical_data(listPrevData[0][0], mult * numpy.array(listPrevData[0][1]), numpy.array(listPrevData[0][2])) data_list = [data_IS0] for i in range(len(listPrevData) - 1): data = HistoricalData(dim_obj_func_min) data.append_historical_data( bias_data['points'][i][:200, :], mult * numpy.array(bias_data['vals'][i][:200]), numpy.ones(len(bias_data['vals'][i][:200])) * (numpy.mean(listPrevData[0][2]) + numpy.mean(listPrevData[i + 1][2]))) data_list.append(data) return data_list
def test_1d_analytic_ei_edge_cases(self): """Test cases where analytic EI would attempt to compute 0/0 without variance lower bounds.""" base_coord = numpy.array([0.5]) point1 = SamplePoint(base_coord, -1.809342, 0) point2 = SamplePoint(base_coord * 2.0, -1.09342, 0) # First a symmetric case: only one historical point data = HistoricalData(base_coord.size, [point1]) hyperparameters = numpy.array([0.2, 0.3]) covariance = SquareExponential(hyperparameters) gaussian_process = GaussianProcess(covariance, data) point_to_sample = base_coord ei_eval = ExpectedImprovement(gaussian_process, point_to_sample) ei = ei_eval.compute_expected_improvement() grad_ei = ei_eval.compute_grad_expected_improvement() self.assert_scalar_within_relative(ei, 0.0, 1.0e-15) self.assert_vector_within_relative(grad_ei, numpy.zeros(grad_ei.shape), 1.0e-15) shifts = (1.0e-15, 4.0e-11, 3.14e-6, 8.89e-1, 2.71) self._check_ei_symmetry(ei_eval, point_to_sample, shifts) # Now introduce some asymmetry with a second point # Right side has a larger objetive value, so the EI minimum # is shifted *slightly* to the left of best_so_far. gaussian_process.add_sampled_points([point2]) shift = 3.0e-12 ei_eval = ExpectedImprovement(gaussian_process, point_to_sample - shift) ei = ei_eval.compute_expected_improvement() grad_ei = ei_eval.compute_grad_expected_improvement() self.assert_scalar_within_relative(ei, 0.0, 1.0e-15) self.assert_vector_within_relative(grad_ei, numpy.zeros(grad_ei.shape), 1.0e-15)
def build_random_gaussian_process(points_sampled, covariance, noise_variance=None, gaussian_process_type=GaussianProcess): r"""Utility to draw ``points_sampled.shape[0]`` points from a GaussianProcess prior, add those values to the GP, and return the GP. This is mainly useful for testing or when "random" data is needed that will produce reasonably well-behaved GPs. :param points_sampled: points at which to draw from the GP :type points_sampled: array of float64 with shape (num_sampled, dim) :param covariance: covariance function backing the GP :type covariance: interfaces.covariance_interface.CovarianceInterface subclass composable with gaussian_process_type :param noise_variance: the ``\sigma_n^2`` (noise variance) associated w/the new observations, ``points_sampled_value`` :type noise_variance: array of float64 with shape (num_sampled) :param gaussian_process_type: gaussian process whose historical data is being set :type gaussian_process_type: interfaces.gaussian_process_interface.GaussianProcessInterface subclass :return: a gaussian process with the generated prior data :rtype: gaussian_process_type object """ if noise_variance is None: noise_variance = numpy.zeros(points_sampled.shape[0]) gaussian_process = gaussian_process_type( covariance, HistoricalData(points_sampled.shape[1])) for i, point in enumerate(points_sampled): # Draw function value from the GP function_value = gaussian_process.sample_point_from_gp( point, noise_variance=noise_variance[i]) # Add function value back into the GP sample_point = [SamplePoint(point, function_value, noise_variance[i])] gaussian_process.add_sampled_points(sample_point) return gaussian_process
class Experiment(object): """A class for MOE optimizable experiments.""" def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ ClosedInterval(bound[0], bound[1]) for bound in domain_bounds ] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, ) def build_json_payload(self): """Construct a json serializeable and MOE REST recognizeable dictionary of the experiment.""" return { 'domain_info': self.domain.get_json_serializable_info(), 'gp_historical_info': self.historical_data.json_payload(), } def __str__(self): """Return a pprint formated version of the experiment dict.""" return pprint.pformat(self.build_json_payload)
def test_sample_point_from_gp(self): """Test that sampling points from the GP works.""" point_one = SamplePoint([0.0, 1.0], -1.0, 0.0) point_two = SamplePoint([2.0, 2.5], 1.0, 0.1) covariance = SquareExponential([1.0, 1.0, 1.0]) historical_data = HistoricalData(len(point_one.point), [point_one, point_two]) gaussian_process = GaussianProcess(covariance, historical_data) out_values = numpy.zeros(3) for i in xrange(3): out_values[i] = gaussian_process.sample_point_from_gp( point_two.point, 0.001) gaussian_process._gaussian_process.reset_to_most_recent_seed() out_values_test = numpy.ones(3) for i in xrange(3): out_values_test[i] = gaussian_process.sample_point_from_gp( point_two.point, 0.001) # Exact match b/c we should've run over the exact same computations self.assert_vector_within_relative(out_values_test, out_values, 0.0) # Sampling from a historical point (that had 0 noise) should produce the same value associated w/that point value = gaussian_process.sample_point_from_gp(point_one.point, 0.0) self.assert_scalar_within_relative(value, point_one.value, numpy.finfo(numpy.float64).eps)
class Experiment(object): """A class for MOE optimizable experiments.""" def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, ) def build_json_payload(self): """Construct a json serializeable and MOE REST recognizeable dictionary of the experiment.""" return { 'domain_info': self.domain.get_json_serializable_info(), 'gp_historical_info': self.historical_data.json_payload(), } def __str__(self): """Return a pprint formated version of the experiment dict.""" return pprint.pformat(self.build_json_payload)
def _make_gp_from_params(params): """Create and return a C++ backed gaussian_process from the request params as a dict. ``params`` has the following form:: params = { 'gp_historical_info': <instance of :class:`moe.views.schemas.base_schemas.GpHistoricalInfo`>, 'domain_info': <instance of :class:`moe.views.schemas.base_schemas.DomainInfo`>, 'covariance_info': <instance of :class:`moe.views.schemas.base_schemas.CovarianceInfo`>, } :param params: The request params dict :type params: dict """ # Load up the info gp_historical_info = params.get("gp_historical_info") domain_info = params.get("domain_info") points_sampled = gp_historical_info.get('points_sampled') sample_point_list = [] for point in points_sampled: sample_point_list.append( SamplePoint( point['point'], point['value'], point['value_var'], )) optimizer_info = params.get('optimizer_info', {}) optimizer_type = optimizer_info.get('optimizer_type', None) if optimizer_type == L_BFGS_B_OPTIMIZER: covariance_of_process = _make_covariance_of_process_from_params( params, "python") gaussian_process = pythonGaussianProcess( covariance_of_process, HistoricalData(domain_info.get('dim'), sample_point_list), ) else: covariance_of_process = _make_covariance_of_process_from_params(params) gaussian_process = GaussianProcess( covariance_of_process, HistoricalData(domain_info.get('dim'), sample_point_list), ) return gaussian_process
def gp_hyper_opt(points_sampled, rest_host=DEFAULT_HOST, rest_port=DEFAULT_PORT, testapp=None, **kwargs): """Hit the rest endpoint for optimizing the hyperparameters of a gaussian process, given points already sampled.""" endpoint = ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT[GP_HYPER_OPT_ROUTE_NAME] # This will fail if len(points_sampled) == 0; but then again this endpoint doesn't make sense with 0 historical data gp_dim = len(points_sampled[0][0]) raw_payload = kwargs.copy() # Sanitize input points points_sampled_clean = [ SamplePoint._make(point) for point in points_sampled ] historical_data = HistoricalData( gp_dim, sample_points=points_sampled_clean, ) if 'domain_info' not in raw_payload: raw_payload['domain_info'] = {'dim': gp_dim} if 'gp_historical_info' not in raw_payload: raw_payload['gp_historical_info'] = historical_data.json_payload() if 'hyperparameter_domain_info' not in raw_payload: hyper_dim = gp_dim + 1 # default covariance has this many parameters raw_payload['hyperparameter_domain_info'] = { 'dim': hyper_dim, 'domain_bounds': [{ 'min': 0.1, 'max': 2.0 }] * hyper_dim, } json_payload = json.dumps(raw_payload) json_response = call_endpoint_with_payload(rest_host, rest_port, endpoint, json_payload, testapp) output = GpHyperOptResponse().deserialize(json_response) return output['covariance_info']
def test_gp_construction_singular_covariance_matrix(self): """Test that the GaussianProcess ctor indicates a singular covariance matrix when points_sampled contains duplicates (0 noise).""" index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 1)) domain, gaussian_process = self.gp_test_environments[index] point_one = SamplePoint([0.0] * domain.dim, 1.0, 0.0) # points two and three have duplicate coordinates and we have noise_variance = 0.0 point_two = SamplePoint([1.0] * domain.dim, 1.0, 0.0) point_three = point_two historical_data = HistoricalData(len(point_one.point), [point_one, point_two, point_three]) T.assert_raises(C_GP.SingularMatrixException, GaussianProcess, gaussian_process.get_covariance_copy(), historical_data)
def load_sample_data(problem, num_per_var, exp_path, result_path): var_dim = int(problem.obj_func_min.getDim()) - 1 num_initial_pts_per_s = int(num_per_var * var_dim) with open(result_path + '_initial_samples.pickle', 'rb') as file: list_init_pts_value_noise = pickle.load(file) new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim()) count = -1 repQL = problem.obj_func_min.repQL s_min = problem.obj_func_min.getSearchDomain()[0, 0] s_max = problem.obj_func_min.getSearchDomain()[0, 1] for s in np.linspace(s_min, s_max, num=problem.obj_func_min.getNums()): count += 1 pts_value_noise = list_init_pts_value_noise[count] points = pts_value_noise[:, 0:-2] vals_array = pts_value_noise[:, -2] noise_array = pts_value_noise[:, -1] new_historical_data.append_historical_data(points, vals_array, noise_array) return new_historical_data
def gp_hyper_opt( points_sampled, rest_host=DEFAULT_HOST, rest_port=DEFAULT_PORT, testapp=None, **kwargs ): """Hit the rest endpoint for optimizing the hyperparameters of a gaussian process, given points already sampled.""" endpoint = ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT[GP_HYPER_OPT_ROUTE_NAME] # This will fail if len(points_sampled) == 0; but then again this endpoint doesn't make sense with 0 historical data gp_dim = len(points_sampled[0][0]) raw_payload = kwargs.copy() # Sanitize input points points_sampled_clean = [SamplePoint._make(point) for point in points_sampled] historical_data = HistoricalData( gp_dim, sample_points=points_sampled_clean, ) if 'domain_info' not in raw_payload: raw_payload['domain_info'] = {'dim': gp_dim} if 'gp_historical_info' not in raw_payload: raw_payload['gp_historical_info'] = historical_data.json_payload() if 'hyperparameter_domain_info' not in raw_payload: hyper_dim = gp_dim + 1 # default covariance has this many parameters raw_payload['hyperparameter_domain_info'] = { 'dim': hyper_dim, 'domain_bounds': [{'min': 0.1, 'max': 2.0}] * hyper_dim, } json_payload = json.dumps(raw_payload) json_response = call_endpoint_with_payload(rest_host, rest_port, endpoint, json_payload, testapp) output = GpHyperOptResponse().deserialize(json_response) return output['covariance_info']
def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ ClosedInterval(bound[0], bound[1]) for bound in domain_bounds ] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, )
def createHistoricalDataForMisoEI(dim_obj_func_min, listPrevData, directory, bias_filename): """ Note: since misoEI uses notion of fidelity variance, I set it to noise_var + bias^2, where bias is estimated from biasData :param dim_obj_func_min: :param listPrevData: :return: """ with open("{0}/{1}.pickle".format(directory, bias_filename), "rb") as input_file: bias_data = pickle.load(input_file) bias_sq_list = numpy.power( numpy.concatenate(([0.], [ numpy.mean(bias_data['vals'][i]) for i in range(len(listPrevData) - 1) ])), 2.0) data_list = [] for i, dataset in enumerate(listPrevData): data = HistoricalData(dim_obj_func_min) data.append_historical_data(dataset[0], dataset[1], numpy.array(dataset[2]) + bias_sq_list[i]) data_list.append(data) return data_list, bias_sq_list
def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, )
def get_random_gp_data(space_dim, num_is, num_data_each_is, kernel_name): """ Generate random gp data :param space_dim: :param num_is: :param num_data_each_is: :param kernel_name: currently it's either 'mix_exp' or 'prod_ker' :return: """ sample_var = 0.01 if kernel_name == "mix_exp": hyper_params = numpy.random.uniform(size=(num_is + 1) * (space_dim + 1)) cov = MixedSquareExponential(hyper_params, space_dim + 1, num_is) elif kernel_name == "prod_ker": hyper_params = numpy.random.uniform(size=(num_is + 1) * (num_is + 2) / 2 + space_dim + 1) cov = ProductKernel(hyper_params, space_dim + 1, num_is + 1) else: raise NotImplementedError("invalid kernel") python_search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in numpy.repeat([[-10., 10.]], space_dim + 1, axis=0) ]) data = HistoricalData(space_dim + 1) init_pts = python_search_domain.generate_uniform_random_points_in_domain(2) init_pts[:, 0] = numpy.zeros(2) data.append_historical_data(init_pts, numpy.zeros(2), numpy.ones(2) * sample_var) gp = GaussianProcess(cov, data) points = python_search_domain.generate_uniform_random_points_in_domain( num_data_each_is) for pt in points: for i in range(num_is): pt[0] = i val = gp.sample_point_from_gp(pt, sample_var) data.append_sample_points([ [pt, val, sample_var], ]) gp = GaussianProcess(cov, data) return hyper_params, data
def generate_data(self, num_data): python_search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in self._info_dict['search_domain'] ]) data = HistoricalData(self._info_dict['dim']) init_pts = python_search_domain.generate_uniform_random_points_in_domain( 2) init_pts[:, 0] = numpy.zeros(2) data.append_historical_data(init_pts, numpy.zeros(2), numpy.ones(2) * self._sample_var_1) gp = GaussianProcess(self._cov, data) points = python_search_domain.generate_uniform_random_points_in_domain( num_data) for pt in points: pt[0] = numpy.ceil(numpy.random.uniform(high=2.0, size=1)) sample_var = self._sample_var_1 if pt[ 0] == 1 else self._sample_var_2 val = gp.sample_point_from_gp(pt, sample_var) data.append_sample_points([ [pt, val, sample_var], ]) gp = GaussianProcess(self._cov, data) return data
observations = [0] + [i + 1 for i in derivatives] init_pts_value = np.array( [objective_func.evaluate(pt) for pt in init_pts] ) # [:, observations] true_value_init = np.array( [objective_func.evaluate_true(pt) for pt in init_pts] ) # [:, observations] # Collecting Data s_suggest = np.array(init_pts) f_s_suggest = np.array(init_pts_value).reshape(initial_n, 1) s_recommend = np.array(init_pts) f_s_recommend = np.array(true_value_init).reshape(initial_n, 1) elapsed = np.zeros([1, num_iteration + initial_n]) init_data = HistoricalData(dim=objective_func._dim, num_derivatives=len(derivatives)) init_data.append_sample_points( [ SamplePoint( pt, [init_pts_value[num, i] for i in observations], objective_func._sample_var, ) for num, pt in enumerate(init_pts) ] ) # initialize the model prior = DefaultPrior(1 + dim + len(observations), len(observations)) # noisy = False means the underlying function being optimized is noise-free
def test(self): rb = RosenbrockVanilla() func_name = rb.getFuncName() pathToPickles = 'picklesTest' ### Test load_data_from_a_min_problem() name_testfile = 'load_and_store_Test' samples = numpy.array([[[1, 1], [1, 2]]]) #print samples values = [[1.0, 2.0]] data = {"points": samples, "vals": values} with open("{0}/{1}.pickle".format(pathToPickles, name_testfile), "wb") as output_file: pickle.dump(data, output_file) loaded_pts, loaded_vals = load_data_from_a_min_problem( pathToPickles, name_testfile) for index in range(len(samples)): self.assertTrue((samples[index] == loaded_pts[index]).all) for index in range(len(values)): self.assertTrue((values[index] == loaded_vals[index])) # test overwriting samples = numpy.array([[[1, 4], [1, 2]]]) with open("{0}/{1}.pickle".format(pathToPickles, name_testfile), "wb") as output_file: pickle.dump(data, output_file) loaded_pts, loaded_vals = load_data_from_a_min_problem( pathToPickles, name_testfile) for index in range(len(samples)): self.assertTrue((samples[index] == loaded_pts[index]).all) for index in range(len(values)): self.assertTrue((values[index] == loaded_vals[index])) ### Test obtainHistoricalDataForEGO() #TODO come up with tests for these functions list_IS_to_query = [0] num_init_pts_each_IS = 10 name_testfile = rb.getFuncName() + '_' + 'IS_' + '_'.join( str(element) for element in list_IS_to_query) + '_' + str( num_init_pts_each_IS) + "_points_each" with open("{0}/{1}.pickle".format(pathToPickles, name_testfile), "wb") as output_file: pickle.dump(data, output_file) # testHistoricalData = obtainHistoricalDataForEGO(True, rb, pathToPickles, list_IS_to_query, num_init_pts_each_IS) # print testHistoricalData # # testHistoricalDataRandom = obtainHistoricalDataForEGO(False, rb, pathToPickles, list_IS_to_query, num_init_pts_each_IS) # print testHistoricalDataRandom ### Test createHistoricalDataForKG() listPrevData = [] samples = [[1, 1], [1, 2]] values = [1.0, 2.0] list_noise_variance_at_sample = [0.1, 0.3] listPrevData.append((samples, values, list_noise_variance_at_sample)) hist_kg = createHistoricalDataForKG(rb._dim, listPrevData) #print hist_kg IS_samples = [[0, 1, 1], [0, 1, 2]] for index in range(len(hist_kg.points_sampled)): self.assertTrue( (IS_samples[index] == hist_kg.points_sampled[index]).all) for index in range(len(hist_kg.points_sampled_value)): self.assertTrue( (values[index] == hist_kg.points_sampled_value[index]).all) samples = [[0, 0], [4, 3]] for index in range(len(hist_kg.points_sampled)): self.assertTrue( (IS_samples[index] == hist_kg.points_sampled[index]).all) listPrevData = [(samples, values, list_noise_variance_at_sample)] bestpt, bestval, best_truth = findBestSampledValue(rb, listPrevData, 0) # print findBestSampledValue(rb, listPrevData, 0) self.assertAlmostEqual(bestval, 1.0, delta=.0001) self.assertAlmostEqual(bestval, 1.0, delta=0.0001) # self.assertAlmostEqual(bestval, 1.0, delta=0.0001) self.assertAlmostEqual(best_truth, numpy.float64(-9.0), delta=1.0) self.assertTrue((bestpt == [0.0, 0.0])) list_sampled_IS = [0, 0] gathered_data_from_all_replications = [] gathered_data_from_all_replications.append({ "points": samples, "vals": values, "noise_variance": list_noise_variance_at_sample, "sampledIS": list_sampled_IS }) for indexList in range(len(gathered_data_from_all_replications)): for indexElem in range( len(gathered_data_from_all_replications[indexList] ['vals'])): self.assertAlmostEqual( values[indexElem], gathered_data_from_all_replications[indexList]['vals'] [indexElem], delta=0.0001) for indexElem in range( len(gathered_data_from_all_replications[indexList] ['points'])): self.assertTrue(samples[indexElem] == gathered_data_from_all_replications[indexList] ['points'][indexElem]) for indexElem in range( len(gathered_data_from_all_replications[indexList] ['sampledIS'])): self.assertTrue(list_sampled_IS[indexElem] == gathered_data_from_all_replications[indexList] ['sampledIS'][indexElem]) gathered_data_from_all_replications.append({ "points": samples, "vals": values, "noise_variance": list_noise_variance_at_sample, "sampledIS": list_sampled_IS }) for indexList in range(len(gathered_data_from_all_replications)): for indexElem in range( len(gathered_data_from_all_replications[indexList] ['vals'])): self.assertAlmostEqual( values[indexElem], gathered_data_from_all_replications[indexList]['vals'] [indexElem], delta=0.0001) for indexElem in range( len(gathered_data_from_all_replications[indexList] ['points'])): self.assertTrue(samples[indexElem] == gathered_data_from_all_replications[indexList] ['points'][indexElem]) for indexElem in range( len(gathered_data_from_all_replications[indexList] ['sampledIS'])): self.assertTrue(list_sampled_IS[indexElem] == gathered_data_from_all_replications[indexList] ['sampledIS'][indexElem]) samples = [[-1., 0], [0.1, -2.0]] values = [0.2, 1.5] list_sampled_IS = [3, 3] gathered_data_from_all_replications.append({ "points": samples, "vals": values, "noise_variance": list_noise_variance_at_sample, "sampledIS": list_sampled_IS }) for indexElem in range( len(gathered_data_from_all_replications[2]['vals'])): self.assertAlmostEqual( values[indexElem], gathered_data_from_all_replications[2]['vals'][indexElem], delta=0.0001) for indexElem in range( len(gathered_data_from_all_replications[2]['points'])): self.assertTrue( samples[indexElem] == gathered_data_from_all_replications[2] ['points'][indexElem]) for indexElem in range( len(gathered_data_from_all_replications[2]['sampledIS'])): self.assertTrue( list_sampled_IS[indexElem] == gathered_data_from_all_replications[2]['sampledIS'][indexElem]) listPrevData.append( (gathered_data_from_all_replications[2]['points'], gathered_data_from_all_replications[2]['vals'], gathered_data_from_all_replications[2]['noise_variance'])) hist_kg = createHistoricalDataForKG(rb._dim, listPrevData) #print hist_kg self.assertTrue((hist_kg.points_sampled[0] == [0, 0, 0]).all) self.assertTrue((hist_kg.points_sampled[1] == [0, 4, 3]).all) self.assertTrue((hist_kg.points_sampled[2] == [1, -1.0, 0]).all) self.assertTrue((hist_kg.points_sampled[3] == [1, .1, -2]).all) self.assertAlmostEqual(values[0], -1.0 * hist_kg.points_sampled_value[2], delta=0.0001) self.assertAlmostEqual(values[1], -1.0 * hist_kg.points_sampled_value[3], delta=0.0001) self.assertAlmostEqual(list_noise_variance_at_sample[0], hist_kg.points_sampled_noise_variance[2], delta=0.0001) self.assertAlmostEqual(list_noise_variance_at_sample[1], hist_kg.points_sampled_noise_variance[3], delta=0.0001) ### Test for findBestSampledValueFromHistoricalData() atoext = AssembleToOrderExtended(mult=-1.0) hd = HistoricalData(atoext.getDim()) pts = numpy.array([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 0.2, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4]]) self.assertTrue(len(pts) == 2) self.assertTrue(len(pts[0]) == atoext.getDim()) self.assertTrue(len(pts[1]) == atoext.getDim()) vals = [-1.0, 0.2] noises = [0.1, 0.2] hd.append_historical_data(pts, vals, noises) # print hd.to_list_of_sample_points() bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData( atoext, hd) # print bestpt # print best_val # print best_truth self.assertTrue((pts[0] == bestpt).all) self.assertTrue(best_val == -1.0) self.assertAlmostEqual(best_truth, atoext.evaluate(2, bestpt), delta=10.0) pts = numpy.array([[1.3, 1.4, 10.0, 11.0, 19.0, 1.0, 1.0, 1.0], [13.0, 10.2, 10.3, 10.4, 10.5, 0.2, 10.3, 0.4]]) vals = [-11.0, 10.2] noises = [10.1, 1000.2] hd.append_historical_data(pts, vals, noises) bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData( atoext, hd) self.assertTrue((pts[0] == bestpt).all) self.assertTrue(best_val == -11.0) pts2 = numpy.array([[10.3, 10.4, 10.0, 11.0, 19.0, 1.0, 1.0, 1.0], [13.0, 10.2, 10.3, 10.4, 10.5, 0.2, 10.3, 0.4]]) vals = [11.0, 10.2] hd.append_historical_data(pts, vals, noises) bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData( atoext, hd) self.assertTrue((pts[0] == bestpt).all) self.assertTrue(best_val == -11.0)
def main(): args = docopt(__doc__) # Parse arguments mesh = args['<mesh>'] weights = np.load(args['<weightfile>']) init_centroid = np.genfromtxt(args['<init_centroid>']) coil = args['<coil>'] output_file = args['<output_file>'] cpus = int(args['--cpus']) or 8 tmpdir = args['--tmp-dir'] or os.getenv('TMPDIR') or "/tmp/" num_iters = int(args['--n-iters']) or 50 min_samps = int(args['--min-var-samps']) or 10 tol = float(args['--convergence']) or 0.001 history = args['--history'] skip_convergence = args['--skip-convergence'] options = args['--options'] if options: with open(options, 'r') as f: opts = json.load(f) logging.info("Using custom options file {}".format(options)) logging.info("{}".format('\''.join( [f"{k}:{v}" for k, v in opts.items()]))) else: opts = {} logging.info('Using {} cpus'.format(cpus)) f = FieldFunc(mesh_file=mesh, initial_centroid=init_centroid, tet_weights=weights, coil=coil, field_dir=tmpdir, cpus=cpus, **opts) # Make search domain search_domain = TensorProductDomain([ ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]), ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]), ClosedInterval(0, 180) ]) c_search_domain = cTensorProductDomain([ ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]), ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]), ClosedInterval(0, 180) ]) # Generate historical points prior = DefaultPrior(n_dims=3 + 2, num_noise=1) prior.tophat = TophatPrior(-2, 5) prior.ln_prior = NormalPrior(12.5, 1.6) hist_pts = cpus i = 0 init_pts = search_domain.generate_uniform_random_points_in_domain(hist_pts) observations = -f.evaluate(init_pts) hist_data = HistoricalData(dim=3, num_derivatives=0) hist_data.append_sample_points( [SamplePoint(inp, o, 0.0) for o, inp in zip(observations, init_pts)]) # Train GP model gp_ll = GaussianProcessLogLikelihoodMCMC(historical_data=hist_data, derivatives=[], prior=prior, chain_length=1000, burnin_steps=2000, n_hypers=2**4, noisy=False) gp_ll.train() # Initialize grad desc params sgd_params = cGDParams(num_multistarts=200, max_num_steps=50, max_num_restarts=5, num_steps_averaged=4, gamma=0.7, pre_mult=1.0, max_relative_change=0.5, tolerance=1.0e-10) num_samples = int(cpus * 1.3) best_point_history = [] # Sum of errors buffer var_buffer = deque(maxlen=min_samps) for i in np.arange(0, num_iters): # Optimize qEI and pick samples points_to_sample, ei = gen_sample_from_qei(gp_ll.models[0], c_search_domain, sgd_params=sgd_params, num_samples=num_samples, num_mc=2**10) # Collect observations sampled_points = -f.evaluate(points_to_sample) evidence = [ SamplePoint(c, v, 0.0) for c, v in zip(points_to_sample, sampled_points) ] # Update model gp_ll.add_sampled_points(evidence) gp_ll.train() # Pull model and pull values gp = gp_ll.models[0] min_point = np.argmin(gp._points_sampled_value) min_val = np.min(gp._points_sampled_value) best_coord = gp.get_historical_data_copy().points_sampled[min_point] logging.info('Iteration {} of {}'.format(i, num_iters)) logging.info('Recommended Points:') logging.info(points_to_sample) logging.info('Expected Improvement: {}'.format(ei)) logging.info('Current Best:') logging.info(f'f(x*)= {min_val}') logging.info(f'Coord: {best_coord}') best_point_history.append(str(min_val)) if history: with open(history, 'w') as buf: buf.write('\n'.join(best_point_history)) # Convergence check if (len(var_buffer) == var_buffer.maxlen) and not skip_convergence: deviation = sum([abs(x - min_val) for x in var_buffer]) if deviation < tol: logging.info('Convergence reached!') logging.info('Deviation: {}'.format(deviation)) logging.info('History length: {}'.format(var_buffer.maxlen)) logging.info('Tolerance: {}'.format(tol)) break var_buffer.append(min_val) # Save position and orientation matrix np.savetxt(output_file, best_coord)
noise_and_cost_func = obj_func_min.noise_and_cost_func # Load initial data from pickle init_pts = load_init_points_for_all_IS("pickles", init_data_pickle_filename, obj_func_min._numIS) init_vals = load_vals("pickles", init_data_pickle_filename, obj_func_min._numIS) #init_pts, init_vals = sample_initial_points.load_data_from_a_min_problem("pickles", init_data_pickle_filename) # setup benchmark result container multi_kg_result = BenchmarkResult(num_iterations, obj_func_max._dim, benchmark_result_table_name) kg_hyper_param = pandas.read_sql_table( 'multifidelity_kg_hyperparam_' + func_name, sql_util.sql_engine).mean(axis=0).values kg_data = HistoricalData(obj_func_max._dim + 1) best_sampled_val = numpy.inf for i in range(obj_func_max._num_IS): IS_pts = numpy.hstack(((i + 1) * numpy.ones(len(init_pts[i])).reshape( (-1, 1)), init_pts[i])) # multiply all values by -1 since we assume that the training data stems from the minimization version # but misoKG uses the maximization version vals = -1.0 * numpy.array(init_vals[i]) # obtain what used to be sample_vars noise_vars = numpy.array( [noise_and_cost_func(i + 1, pt)[0] for pt in init_pts[i]]) kg_data.append_historical_data(IS_pts, vals, noise_vars) # find the best initial value
# separate hypers for GP and for observational noise print "misoKG: repl {0}, itr {1}, best hyper: {2}".format( problem.replication_no, kg_iteration, best_hyper) ### Format: IS 0: signal variance and length scales, IS 1: signal variance and length scales, etc. ### Then observational noise for IS 0, IS 1 etc. hyperparameters_noise = numpy.power(best_hyper[-num_IS:], 2.0) hypers_GP = best_hyper[:-num_IS] # update noise in historical data updated_points_sampled_noise_variance = create_array_points_sampled_noise_variance( current_hist_data.points_sampled, hyperparameters_noise) # create new Historical data object with updated values new_historical_data = HistoricalData( dim=problem.obj_func_min.getDim() + 1) # increased by one for index of IS new_historical_data.append_historical_data( current_hist_data.points_sampled, current_hist_data.points_sampled_value, updated_points_sampled_noise_variance) # Use new hyperparameters -- this requires instantiating a new GP object kg_cov_cpp = cppMixedSquareExponential(hyperparameters=hypers_GP) kg_gp_cpp = GaussianProcessNew(kg_cov_cpp, new_historical_data, num_IS_in=problem.num_is_in) # kg_cov_cpp is not used afterwards ### Find IS and point that maximize KG/cost discretization_points = problem.obj_func_min.get_moe_domain(
def obtainHistoricalDataForEGO(load_historical_data_from_pickle, obj_func_min, directoryToPickles, list_IS_to_query, num_init_pts_each_IS, init_data_pickle_filename=''): ''' Create Historical Data object for EGO that contains initial data. If truthIS is among the IS, then load only the data from that one Args: load_historical_data_from_pickle: if True load from pickle otherwise do a random Latin hypercube design obj_func_min: the problem directoryToPickles: path to the directory that contains the pickle files list_IS_to_query: list of the IS that should be queried, e.g. [0, 1, 2] num_init_pts_each_IS: how many points for each IS - is either used to find right pickle or to determine the number of points to sample init_data_pickle_filename: optional parameter that gives the filename of the pickle to load Returns: HistoricalData object ''' historical_data = HistoricalData(obj_func_min._dim) if (load_historical_data_from_pickle): # To load the pickled data, do: if (init_data_pickle_filename == ''): init_data_pickle_filename = obj_func_min.getFuncName() + '_' + 'IS_' \ + '_'.join(str(element) for element in list_IS_to_query) + '_' \ + str(num_init_pts_each_IS) + "_points_each" init_pts_array, init_vals_array = load_data_from_a_min_problem( directoryToPickles, init_data_pickle_filename) # if truthIS is among the sampled, then load only that one: if obj_func_min.getTruthIS() in list_IS_to_query: indexArray = list_IS_to_query.index(obj_func_min.getTruthIS()) sample_vars = [ obj_func_min.noise_and_cost_func(obj_func_min.getTruthIS(), pt)[0] for pt in init_pts_array[indexArray] ] historical_data.append_historical_data(init_pts_array[indexArray], init_vals_array[indexArray], sample_vars) else: # load data for all IS indexArray = 0 for index_IS in list_IS_to_query: sample_vars = [ obj_func_min.noise_and_cost_func(index_IS, pt)[0] for pt in init_pts_array[indexArray] ] historical_data.append_historical_data( init_pts_array[indexArray], init_vals_array[indexArray], sample_vars) indexArray += 1 else: # generate initial data from querying random points for each IS for index_IS in list_IS_to_query: if (obj_func_min.getTruthIS() in list_IS_to_query) and ( index_IS != obj_func_min.getTruthIS()): continue # the truthIS is observed but this is another IS: skip! search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ]) pts = search_domain.generate_uniform_random_points_in_domain( num_init_pts_each_IS) vals = [obj_func_min.evaluate(index_IS, pt) for pt in pts] sample_vars = [ obj_func_min.noise_and_cost_func(index_IS, pt)[0] for pt in pts ] historical_data.append_historical_data(pts, vals, sample_vars) return historical_data
def construct_hist_data_from_s3(bucket, dim, IS_key_dict, combine_IS, sign, take_diff=False, primary_IS=None): """ :param bucket: amazon s3 bucket object :param dim: space dimension of the problem :type dim: int :param IS_key_dict: {IS: key} hashtable which provides key of the data for the corresponding IS :type IS_key_dict: dict :param combine_IS: whether construct a single HistoricalData on the space IS \times space, or a dict of HistoricalData objects, with each corresponds to each IS :type combine_IS: bool :param sign: sign = 1.0 means minimization problem, otherwise is maximization :type sign: float :param take_diff: whether take diff between IS_i and primary_IS, this is enabled for one approach of estimating mKG hyperparameters :type take_diff: bool :param primary_key: if take_diff = True, this is used to specify primary IS :type primary_key: int :return: if combine_IS = True, return a HistoricalData object, otherwise return a dict of {IS: HistoricalData} :rtype: HistoricalData or dict """ points_dict = {} vals_dict = {} noise_dict = {} if take_diff: data = get_data_from_s3(bucket, IS_key_dict[primary_IS]) points_dict[primary_IS] = np.array(data['points']) vals_dict[primary_IS] = sign * np.array(data['vals']) noise_dict[primary_IS] = np.array(data['noise']) for IS in IS_key_dict: if take_diff and IS != primary_IS: data = get_data_from_s3(bucket, IS_key_dict[IS]) assert np.array_equal(data['points'], points_dict[primary_IS] ), "inconsistent points, cannot take diff!" points_dict[IS] = np.array(data['points']) vals_dict[IS] = sign * np.array( data['vals']) - vals_dict[primary_IS] noise_dict[IS] = np.array(data['noise']) + noise_dict[primary_IS] elif not take_diff: data = get_data_from_s3(bucket, IS_key_dict[IS]) points_dict[IS] = np.array(data['points']) vals_dict[IS] = sign * np.array(data['vals']) noise_dict[IS] = np.array(data['noise']) if combine_IS: to_return = HistoricalData(dim=dim + 1) for IS in points_dict: num_data = len(vals_dict[IS]) to_return.append_historical_data( np.hstack((IS * np.ones(num_data).reshape( (-1, 1)), points_dict[IS])), vals_dict[IS], noise_dict[IS]) else: to_return = {} for IS in points_dict: to_return[IS] = HistoricalData(dim=dim) to_return[IS].append_historical_data(points_dict[IS], vals_dict[IS], noise_dict[IS]) return to_return
return (0.001, 1000) if IS == 1 else (0.01, 1) obj_func_max = Rosenbrock(num_IS=2, noise_and_cost_func=noise_and_cost_func, mult=-1.0) num_discretization = 5000 num_init_pts_all_IS = [5, 5] num_multistart = 50 hyper_param = pandas.read_sql_table('multifidelity_kg_hyperparam_' + func_name, sql_util.sql_engine).mean(axis=0).values search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_max._search_domain ]) ### Gen initial points data = HistoricalData(obj_func_max._dim + 1) for i in range(obj_func_max._num_IS): pts = search_domain.generate_uniform_random_points_in_domain( num_init_pts_all_IS[i]) vals = [obj_func_max.evaluate(i + 1, pt) for pt in pts] IS_pts = numpy.hstack( ((i + 1) * numpy.ones(num_init_pts_all_IS[i]).reshape((-1, 1)), pts)) sample_vars = [ obj_func_max.noise_and_cost_func(i + 1, pt)[0] for pt in pts ] data.append_historical_data(IS_pts, vals, sample_vars) cov_func = MixedSquareExponential(hyperparameters=hyper_param, total_dim=obj_func_max._dim + 1, num_is=obj_func_max._num_IS) gp = GaussianProcess(cov_func, data)
func_name = 'assembleToOrder' obj_func_max = AssembleToOrder(numIS=4) num_pts_to_gen = 100 # numpy.repeat( 250, obj_func_max.getNumIS()) hyper_bounds = [ (0.01, 100) for i in range((obj_func_max.getDim() + 1) * (obj_func_max.getNumIS() + 1)) ] num_hyper_multistart = 5 search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_max.getSearchDomain() ]) ### Gen points for hyperparam estimation data = HistoricalData(obj_func_max.getDim() + 1) # should go into the objective func obj for i in range(obj_func_max.getNumIS()): pts = search_domain.generate_uniform_random_points_in_domain( num_pts_to_gen) vals = [obj_func_max.evaluate(i + 1, pt) for pt in pts] IS_pts = numpy.hstack(((i + 1) * numpy.ones(num_pts_to_gen).reshape( (-1, 1)), pts)) sample_vars = [ obj_func_max.noise_and_cost_func(i + 1, pt)[0] for pt in pts ] data.append_historical_data(IS_pts, vals, sample_vars) # hyperparam opt print "start hyperparam optimization..." hyperparam_search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds])
def construct_hist_data_from_pickle(dim, directory, IS_filename_dict, combine_IS, sign, take_diff=False, primary_key=None): """ :param dim: space dimension of the problem :type dim: int :param directory: dir of the pickle files :type directory: str :param IS_filename_dict: {IS: filename} hashtable which provides name of the pickle file for the corresponding IS :type IS_filename_dict: dict :param combine_IS: whether construct a single HistoricalData on the space IS \times space, or a dict of HistoricalData objects, with each corresponds to each IS :type combine_IS: bool :param sign: sign = 1.0 means minimization problem, otherwise is maximization :type sign: float :param take_diff: whether take diff between IS_i and primary_IS, this is enabled for one approach of estimating mKG hyperparameters :type take_diff: bool :param primary_key: if take_diff = True, this is used to specify primary IS :type primary_key: int :return: if combine_IS = True, return a HistoricalData object, otherwise return a dict of {IS: HistoricalData} :rtype: HistoricalData or dict """ points_dict = {} vals_dict = {} noise_dict = {} if take_diff: with open( "{0}/{1}.pickle".format(directory, IS_filename_dict[primary_key]), "rb") as f: data = pickle.load(f) points_dict[primary_key] = np.array(data['points']) vals_dict[primary_key] = sign * np.array(data['vals']) noise_dict[primary_key] = np.array(data['noise']) for key in IS_filename_dict: if take_diff and key != primary_key: with open( "{0}/{1}.pickle".format(directory, IS_filename_dict[key]), "rb") as f: data = pickle.load(f) assert np.array_equal( data['points'], points_dict[primary_key] ), "inconsistent points, cannot take diff!" points_dict[key] = np.array(data['points']) vals_dict[key] = sign * np.array( data['vals']) - vals_dict[primary_key] noise_dict[key] = np.array( data['noise']) + noise_dict[primary_key] elif not take_diff: with open( "{0}/{1}.pickle".format(directory, IS_filename_dict[key]), "rb") as f: data = pickle.load(f) points_dict[key] = np.array(data['points']) vals_dict[key] = sign * np.array(data['vals']) noise_dict[key] = np.array(data['noise']) if combine_IS: to_return = HistoricalData(dim=dim + 1) for key in points_dict: num_data = len(vals_dict[key]) to_return.append_historical_data( np.hstack((key * np.ones(num_data).reshape( (-1, 1)), points_dict[key])), vals_dict[key], noise_dict[key]) else: to_return = {} for key in points_dict: to_return[key] = HistoricalData(dim=dim) to_return[key].append_historical_data(points_dict[key], vals_dict[key], noise_dict[key]) return to_return
func_name = 'assembleToOrder' obj_func_min = AssembleToOrder(numIS=4, mult=-1.0) hyper_bounds = [(0.01, 100) for i in range(obj_func_min.getDim() + 1)] num_hyper_multistart = 3 num_pts_to_gen = 250 search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min.getSearchDomain() ]) cov = SquareExponential(numpy.ones(obj_func_min.getDim() + 1)) hyper_param = numpy.zeros((obj_func_min.getNumIS(), obj_func_min.getDim() + 1)) ### Gen points for hyperparam estimation for i in range(obj_func_min.getNumIS()): data = HistoricalData(obj_func_min.getDim()) pts = search_domain.generate_uniform_random_points_in_domain( num_pts_to_gen) vals = [obj_func_min.evaluate(i + 1, pt) for pt in pts] sample_vars = [ obj_func_min.noise_and_cost_func(i + 1, pt)[0] for pt in pts ] data.append_historical_data(pts, vals, sample_vars) # hyperparam opt hyperparam_search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds]) multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain( num_hyper_multistart) best_f = numpy.inf for k in range(num_hyper_multistart): hyper, f, output = hyper_opt(cov,
conn = boto.connect_s3() bucket = conn.get_bucket(s3_bucket_name, validate=True) __author__ = 'jialeiwang' # construct problem instance given CMD args # format: run_pes.py ${benchmark_name} ${func_idx} ${repl_no} argv = sys.argv[1:] if argv[0].find("pes") < 0: raise ValueError("benchmark is not pes!") problem = identify_problem(argv, bucket) # Transform data to (0,1)^d space lower_bounds = problem.obj_func_min._search_domain[:, 0] upper_bounds = problem.obj_func_min._search_domain[:, 1] transformed_data = HistoricalData(problem.obj_func_min.getDim() + 1) for pt, val, var in zip(problem.hist_data.points_sampled, problem.hist_data.points_sampled_value, problem.hist_data.points_sampled_noise_variance): transformed_data.append_sample_points([ [ numpy.concatenate( ([pt[0]], scale_forward(pt[1:], lower_bounds, upper_bounds))), val, var ], ]) # entropy search begins def noise_func(IS, x): return problem.obj_func_min.noise_and_cost_func(IS, x)[0]
pre_mult=1.0, max_relative_change=0.7, tolerance=1.0e-3) cpp_sgd_params_ps = cppGradientDescentParameters(num_multistarts=1, max_num_steps=12, max_num_restarts=1, num_steps_averaged=3, gamma=0.7, pre_mult=0.01, max_relative_change=0.01, tolerance=1.0e-5) if obj_func_name == "GP": gp_grad_info_dict = pickle.load(open('random_gp_grad_1d', 'rb')) hist_data_grad = HistoricalData(gp_grad_info_dict['dim'], 1) hist_data_grad.append_historical_data(gp_grad_info_dict['points'], gp_grad_info_dict['values'], gp_grad_info_dict['vars']) objective_func = synthetic_functions.RandomGP( gp_grad_info_dict['dim'], gp_grad_info_dict['hyper_params'], hist_data_grad) hyper_params = gp_grad_info_dict['hyper_params'] init_pts = [[-1.5], [-1.0], [1.0], [1.5]] ymax = 2 elif obj_func_name == "GP_wavy": gp_grad_info_dict = pickle.load(open('random_gp_1d_wavy', 'rb')) hist_data_grad = HistoricalData(gp_grad_info_dict['dim'], 0) hist_data_grad.append_historical_data(gp_grad_info_dict['points'], gp_grad_info_dict['values'], gp_grad_info_dict['vars'])