예제 #1
0
def createHistoricalDataForMisoKGDiff(dim_obj_func_min,
                                      listPrevData,
                                      directory,
                                      bias_filename,
                                      mult=-1.0):
    """ This data is only used to train mKG hyperparams, and suppose listPrevData[0] is unbiased IS
    :param dim_obj_func_min:
    :param listPrevData:
    :param directory:
    :param bias_filename:
    :return:
    """
    with open("{0}/{1}.pickle".format(directory, bias_filename),
              "rb") as input_file:
        bias_data = pickle.load(input_file)
    data_IS0 = HistoricalData(dim_obj_func_min)
    data_IS0.append_historical_data(listPrevData[0][0],
                                    mult * numpy.array(listPrevData[0][1]),
                                    numpy.array(listPrevData[0][2]))
    data_list = [data_IS0]
    for i in range(len(listPrevData) - 1):
        data = HistoricalData(dim_obj_func_min)
        data.append_historical_data(
            bias_data['points'][i][:200, :],
            mult * numpy.array(bias_data['vals'][i][:200]),
            numpy.ones(len(bias_data['vals'][i][:200])) *
            (numpy.mean(listPrevData[0][2]) +
             numpy.mean(listPrevData[i + 1][2])))
        data_list.append(data)
    return data_list
예제 #2
0
def sample_intial_x_general(problem, num_initial_pts_per_s, points_x, exp_path,
                            result_path):
    list_init_pts_value_noise = []
    new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim())
    repQL = problem.obj_func_min.repQL
    s_min = problem.obj_func_min.getSearchDomain()[0, 0]
    s_max = problem.obj_func_min.getSearchDomain()[0, 1]
    for s in np.linspace(s_min, s_max, num=problem.obj_func_min.getNums()):
        random_seeds = np.random.randint(900, size=num_initial_pts_per_s)
        points = np.hstack((s * np.ones(num_initial_pts_per_s).reshape(
            (-1, 1)), points_x))

        vals_array, noise_array = np.zeros(num_initial_pts_per_s), np.zeros(
            num_initial_pts_per_s)
        i = -1
        for (pt, random_seed) in zip(points, random_seeds):
            i += 1
            value, noise_array[i] = problem.obj_func_min.evaluate(
                repQL, pt, random_seed, exp_path)
            vals_array[i] = -1.0 * value

        new_historical_data.append_historical_data(points, vals_array,
                                                   noise_array)

        pts_value_noise = np.hstack((points, vals_array.reshape(
            (-1, 1)), noise_array.reshape((-1, 1))))
        list_init_pts_value_noise.append(pts_value_noise)
        with open(result_path + '_initial_samples.txt', "w") as file:
            file.write(str(list_init_pts_value_noise))
        with open(result_path + '_initial_samples.pickle', "wb") as file:
            dump(np.array(list_init_pts_value_noise), file)
    # print(list_init_pts_value_noise)
    return new_historical_data
예제 #3
0
def createHistoricalDataGeneral(dim_obj_func_min,
                                listPrevData,
                                mult,
                                indexFirstIS=0):
    '''

    Args:
        dim_obj_func_min: dim of the obj function, as given in obj_func_min._dim
        listPrevData: list of tuples (data, vals, noise)
        indexFirstIS: what is the number of the first IS given in listPrevData. Others are numbered consecutively

    Returns: HistoricalData object for KG (with additional first column that gives the IS the data corresponds to

    '''
    data = HistoricalData(dim_obj_func_min + 1)
    indexIS = indexFirstIS  # this is the number that corresponds to the IS-dimension in the GP
    for dataset in listPrevData:
        # add first column that gives the IS the data corresponds to
        IS_pts = numpy.hstack((indexIS * numpy.ones(len(dataset[0])).reshape(
            (-1, 1)), dataset[0]))

        # multiply all values by -1 since we assume that the training data stems from the minimization version
        # but misoKG uses the maximization version
        vals = mult * numpy.array(dataset[1])
        data.append_historical_data(IS_pts, vals, dataset[2])
        indexIS += 1
    return data
예제 #4
0
def sample_initial_data(problem, num_initial_pts_per_IS):
    points = problem.obj_func_min.get_moe_domain().generate_uniform_random_points_in_domain(num_initial_pts_per_IS)
    points_dict = {}
    vals_dict = {}
    noise_dict = {}
    new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim() + 1)  # increased by one for index of IS
    for IS in problem.obj_func_min.getList_IS_to_query():
        points_dict[IS] = np.hstack((IS * np.ones(num_initial_pts_per_IS).reshape((-1, 1)), points))
        vals_dict[IS] = np.array([-1.0 * problem.obj_func_min.evaluate(IS, pt) for pt in points])
        noise_dict[IS] = np.ones(len(points)) * problem.obj_func_min.noise_and_cost_func(IS, None)[0]
        # note: misoKG will learn the noise from sampled data
        new_historical_data.append_historical_data(points_dict[IS], vals_dict[IS], noise_dict[IS])
    return new_historical_data
예제 #5
0
def load_sample_data(problem, num_per_var, exp_path, result_path):
    var_dim = int(problem.obj_func_min.getDim()) - 1
    num_initial_pts_per_s = int(num_per_var * var_dim)
    with open(result_path + '_initial_samples.pickle', 'rb') as file:
        list_init_pts_value_noise = pickle.load(file)
    new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim())
    count = -1
    repQL = problem.obj_func_min.repQL
    s_min = problem.obj_func_min.getSearchDomain()[0, 0]
    s_max = problem.obj_func_min.getSearchDomain()[0, 1]
    for s in np.linspace(s_min, s_max, num=problem.obj_func_min.getNums()):
        count += 1
        pts_value_noise = list_init_pts_value_noise[count]
        points = pts_value_noise[:, 0:-2]
        vals_array = pts_value_noise[:, -2]
        noise_array = pts_value_noise[:, -1]
        new_historical_data.append_historical_data(points, vals_array,
                                                   noise_array)

    return new_historical_data
예제 #6
0
def get_random_gp_data(space_dim, num_is, num_data_each_is, kernel_name):
    """ Generate random gp data
    :param space_dim:
    :param num_is:
    :param num_data_each_is:
    :param kernel_name: currently it's either 'mix_exp' or 'prod_ker'
    :return:
    """
    sample_var = 0.01
    if kernel_name == "mix_exp":
        hyper_params = numpy.random.uniform(size=(num_is + 1) *
                                            (space_dim + 1))
        cov = MixedSquareExponential(hyper_params, space_dim + 1, num_is)
    elif kernel_name == "prod_ker":
        hyper_params = numpy.random.uniform(size=(num_is + 1) *
                                            (num_is + 2) / 2 + space_dim + 1)
        cov = ProductKernel(hyper_params, space_dim + 1, num_is + 1)
    else:
        raise NotImplementedError("invalid kernel")
    python_search_domain = pythonTensorProductDomain([
        ClosedInterval(bound[0], bound[1])
        for bound in numpy.repeat([[-10., 10.]], space_dim + 1, axis=0)
    ])
    data = HistoricalData(space_dim + 1)
    init_pts = python_search_domain.generate_uniform_random_points_in_domain(2)
    init_pts[:, 0] = numpy.zeros(2)
    data.append_historical_data(init_pts, numpy.zeros(2),
                                numpy.ones(2) * sample_var)
    gp = GaussianProcess(cov, data)
    points = python_search_domain.generate_uniform_random_points_in_domain(
        num_data_each_is)
    for pt in points:
        for i in range(num_is):
            pt[0] = i
            val = gp.sample_point_from_gp(pt, sample_var)
            data.append_sample_points([
                [pt, val, sample_var],
            ])
            gp = GaussianProcess(cov, data)
    return hyper_params, data
예제 #7
0
def createHistoricalDataForMisoEI(dim_obj_func_min, listPrevData, directory,
                                  bias_filename):
    """ Note: since misoEI uses notion of fidelity variance, I set it to noise_var + bias^2, where bias is estimated
    from biasData
    :param dim_obj_func_min:
    :param listPrevData:
    :return:
    """
    with open("{0}/{1}.pickle".format(directory, bias_filename),
              "rb") as input_file:
        bias_data = pickle.load(input_file)
    bias_sq_list = numpy.power(
        numpy.concatenate(([0.], [
            numpy.mean(bias_data['vals'][i])
            for i in range(len(listPrevData) - 1)
        ])), 2.0)
    data_list = []
    for i, dataset in enumerate(listPrevData):
        data = HistoricalData(dim_obj_func_min)
        data.append_historical_data(dataset[0], dataset[1],
                                    numpy.array(dataset[2]) + bias_sq_list[i])
        data_list.append(data)
    return data_list, bias_sq_list
예제 #8
0
 def generate_data(self, num_data):
     python_search_domain = pythonTensorProductDomain([
         ClosedInterval(bound[0], bound[1])
         for bound in self._info_dict['search_domain']
     ])
     data = HistoricalData(self._info_dict['dim'])
     init_pts = python_search_domain.generate_uniform_random_points_in_domain(
         2)
     init_pts[:, 0] = numpy.zeros(2)
     data.append_historical_data(init_pts, numpy.zeros(2),
                                 numpy.ones(2) * self._sample_var_1)
     gp = GaussianProcess(self._cov, data)
     points = python_search_domain.generate_uniform_random_points_in_domain(
         num_data)
     for pt in points:
         pt[0] = numpy.ceil(numpy.random.uniform(high=2.0, size=1))
         sample_var = self._sample_var_1 if pt[
             0] == 1 else self._sample_var_2
         val = gp.sample_point_from_gp(pt, sample_var)
         data.append_sample_points([
             [pt, val, sample_var],
         ])
         gp = GaussianProcess(self._cov, data)
     return data
    def test(self):

        rb = RosenbrockVanilla()
        func_name = rb.getFuncName()

        pathToPickles = 'picklesTest'

        ### Test load_data_from_a_min_problem()
        name_testfile = 'load_and_store_Test'
        samples = numpy.array([[[1, 1], [1, 2]]])
        #print samples
        values = [[1.0, 2.0]]
        data = {"points": samples, "vals": values}

        with open("{0}/{1}.pickle".format(pathToPickles, name_testfile),
                  "wb") as output_file:
            pickle.dump(data, output_file)
        loaded_pts, loaded_vals = load_data_from_a_min_problem(
            pathToPickles, name_testfile)
        for index in range(len(samples)):
            self.assertTrue((samples[index] == loaded_pts[index]).all)
        for index in range(len(values)):
            self.assertTrue((values[index] == loaded_vals[index]))

        # test overwriting
        samples = numpy.array([[[1, 4], [1, 2]]])
        with open("{0}/{1}.pickle".format(pathToPickles, name_testfile),
                  "wb") as output_file:
            pickle.dump(data, output_file)
        loaded_pts, loaded_vals = load_data_from_a_min_problem(
            pathToPickles, name_testfile)
        for index in range(len(samples)):
            self.assertTrue((samples[index] == loaded_pts[index]).all)
        for index in range(len(values)):
            self.assertTrue((values[index] == loaded_vals[index]))

        ### Test obtainHistoricalDataForEGO()

        #TODO come up with tests for these  functions

        list_IS_to_query = [0]
        num_init_pts_each_IS = 10

        name_testfile = rb.getFuncName() + '_' + 'IS_' + '_'.join(
            str(element) for element in list_IS_to_query) + '_' + str(
                num_init_pts_each_IS) + "_points_each"

        with open("{0}/{1}.pickle".format(pathToPickles, name_testfile),
                  "wb") as output_file:
            pickle.dump(data, output_file)

        # testHistoricalData = obtainHistoricalDataForEGO(True, rb, pathToPickles, list_IS_to_query, num_init_pts_each_IS)
        # print testHistoricalData
        #
        # testHistoricalDataRandom = obtainHistoricalDataForEGO(False, rb, pathToPickles, list_IS_to_query, num_init_pts_each_IS)
        # print testHistoricalDataRandom

        ### Test createHistoricalDataForKG()
        listPrevData = []

        samples = [[1, 1], [1, 2]]
        values = [1.0, 2.0]
        list_noise_variance_at_sample = [0.1, 0.3]
        listPrevData.append((samples, values, list_noise_variance_at_sample))

        hist_kg = createHistoricalDataForKG(rb._dim, listPrevData)
        #print hist_kg
        IS_samples = [[0, 1, 1], [0, 1, 2]]
        for index in range(len(hist_kg.points_sampled)):
            self.assertTrue(
                (IS_samples[index] == hist_kg.points_sampled[index]).all)
        for index in range(len(hist_kg.points_sampled_value)):
            self.assertTrue(
                (values[index] == hist_kg.points_sampled_value[index]).all)

        samples = [[0, 0], [4, 3]]
        for index in range(len(hist_kg.points_sampled)):
            self.assertTrue(
                (IS_samples[index] == hist_kg.points_sampled[index]).all)

        listPrevData = [(samples, values, list_noise_variance_at_sample)]
        bestpt, bestval, best_truth = findBestSampledValue(rb, listPrevData, 0)
        # print findBestSampledValue(rb, listPrevData, 0)
        self.assertAlmostEqual(bestval, 1.0, delta=.0001)
        self.assertAlmostEqual(bestval, 1.0, delta=0.0001)
        # self.assertAlmostEqual(bestval, 1.0, delta=0.0001)
        self.assertAlmostEqual(best_truth, numpy.float64(-9.0), delta=1.0)
        self.assertTrue((bestpt == [0.0, 0.0]))

        list_sampled_IS = [0, 0]
        gathered_data_from_all_replications = []
        gathered_data_from_all_replications.append({
            "points": samples,
            "vals": values,
            "noise_variance": list_noise_variance_at_sample,
            "sampledIS": list_sampled_IS
        })

        for indexList in range(len(gathered_data_from_all_replications)):
            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['vals'])):
                self.assertAlmostEqual(
                    values[indexElem],
                    gathered_data_from_all_replications[indexList]['vals']
                    [indexElem],
                    delta=0.0001)

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['points'])):
                self.assertTrue(samples[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['points'][indexElem])

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['sampledIS'])):
                self.assertTrue(list_sampled_IS[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['sampledIS'][indexElem])

        gathered_data_from_all_replications.append({
            "points": samples,
            "vals": values,
            "noise_variance": list_noise_variance_at_sample,
            "sampledIS": list_sampled_IS
        })
        for indexList in range(len(gathered_data_from_all_replications)):
            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['vals'])):
                self.assertAlmostEqual(
                    values[indexElem],
                    gathered_data_from_all_replications[indexList]['vals']
                    [indexElem],
                    delta=0.0001)

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['points'])):
                self.assertTrue(samples[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['points'][indexElem])

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['sampledIS'])):
                self.assertTrue(list_sampled_IS[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['sampledIS'][indexElem])

        samples = [[-1., 0], [0.1, -2.0]]
        values = [0.2, 1.5]
        list_sampled_IS = [3, 3]
        gathered_data_from_all_replications.append({
            "points": samples,
            "vals": values,
            "noise_variance": list_noise_variance_at_sample,
            "sampledIS": list_sampled_IS
        })
        for indexElem in range(
                len(gathered_data_from_all_replications[2]['vals'])):
            self.assertAlmostEqual(
                values[indexElem],
                gathered_data_from_all_replications[2]['vals'][indexElem],
                delta=0.0001)

        for indexElem in range(
                len(gathered_data_from_all_replications[2]['points'])):
            self.assertTrue(
                samples[indexElem] == gathered_data_from_all_replications[2]
                ['points'][indexElem])

        for indexElem in range(
                len(gathered_data_from_all_replications[2]['sampledIS'])):
            self.assertTrue(
                list_sampled_IS[indexElem] ==
                gathered_data_from_all_replications[2]['sampledIS'][indexElem])

        listPrevData.append(
            (gathered_data_from_all_replications[2]['points'],
             gathered_data_from_all_replications[2]['vals'],
             gathered_data_from_all_replications[2]['noise_variance']))

        hist_kg = createHistoricalDataForKG(rb._dim, listPrevData)
        #print hist_kg
        self.assertTrue((hist_kg.points_sampled[0] == [0, 0, 0]).all)
        self.assertTrue((hist_kg.points_sampled[1] == [0, 4, 3]).all)
        self.assertTrue((hist_kg.points_sampled[2] == [1, -1.0, 0]).all)
        self.assertTrue((hist_kg.points_sampled[3] == [1, .1, -2]).all)

        self.assertAlmostEqual(values[0],
                               -1.0 * hist_kg.points_sampled_value[2],
                               delta=0.0001)
        self.assertAlmostEqual(values[1],
                               -1.0 * hist_kg.points_sampled_value[3],
                               delta=0.0001)

        self.assertAlmostEqual(list_noise_variance_at_sample[0],
                               hist_kg.points_sampled_noise_variance[2],
                               delta=0.0001)
        self.assertAlmostEqual(list_noise_variance_at_sample[1],
                               hist_kg.points_sampled_noise_variance[3],
                               delta=0.0001)

        ### Test for findBestSampledValueFromHistoricalData()
        atoext = AssembleToOrderExtended(mult=-1.0)
        hd = HistoricalData(atoext.getDim())
        pts = numpy.array([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                           [1.0, 0.2, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4]])
        self.assertTrue(len(pts) == 2)
        self.assertTrue(len(pts[0]) == atoext.getDim())
        self.assertTrue(len(pts[1]) == atoext.getDim())
        vals = [-1.0, 0.2]
        noises = [0.1, 0.2]
        hd.append_historical_data(pts, vals, noises)
        # print hd.to_list_of_sample_points()

        bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData(
            atoext, hd)
        # print bestpt
        # print best_val
        # print best_truth
        self.assertTrue((pts[0] == bestpt).all)
        self.assertTrue(best_val == -1.0)
        self.assertAlmostEqual(best_truth,
                               atoext.evaluate(2, bestpt),
                               delta=10.0)

        pts = numpy.array([[1.3, 1.4, 10.0, 11.0, 19.0, 1.0, 1.0, 1.0],
                           [13.0, 10.2, 10.3, 10.4, 10.5, 0.2, 10.3, 0.4]])
        vals = [-11.0, 10.2]
        noises = [10.1, 1000.2]
        hd.append_historical_data(pts, vals, noises)
        bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData(
            atoext, hd)
        self.assertTrue((pts[0] == bestpt).all)
        self.assertTrue(best_val == -11.0)

        pts2 = numpy.array([[10.3, 10.4, 10.0, 11.0, 19.0, 1.0, 1.0, 1.0],
                            [13.0, 10.2, 10.3, 10.4, 10.5, 0.2, 10.3, 0.4]])
        vals = [11.0, 10.2]
        hd.append_historical_data(pts, vals, noises)
        bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData(
            atoext, hd)
        self.assertTrue((pts[0] == bestpt).all)
        self.assertTrue(best_val == -11.0)
    'multifidelity_kg_hyperparam_' + func_name,
    sql_util.sql_engine).mean(axis=0).values
kg_data = HistoricalData(obj_func_max._dim + 1)
best_sampled_val = numpy.inf
for i in range(obj_func_max._num_IS):
    IS_pts = numpy.hstack(((i + 1) * numpy.ones(len(init_pts[i])).reshape(
        (-1, 1)), init_pts[i]))

    # multiply all values by -1 since we assume that the training data stems from the minimization version
    # but misoKG uses the maximization version
    vals = -1.0 * numpy.array(init_vals[i])

    # obtain what used to be sample_vars
    noise_vars = numpy.array(
        [noise_and_cost_func(i + 1, pt)[0] for pt in init_pts[i]])
    kg_data.append_historical_data(IS_pts, vals, noise_vars)

    # find the best initial value
    if numpy.amin(init_vals[i]) < best_sampled_val:
        best_sampled_val = numpy.amin(init_vals[i])
        best_sampled_point = init_pts[i][numpy.argmin(init_vals[i]), :]
truth_at_best_sampled = obj_func_min.evaluate(truth_IS, best_sampled_point)

kg_cov = MixedSquareExponential(hyperparameters=kg_hyper_param,
                                total_dim=obj_func_max._dim + 1,
                                num_is=obj_func_max._num_IS)
kg_cov_cpp = cppMixedSquareExponential(hyperparameters=kg_hyper_param)
kg_gp_cpp = GaussianProcessNew(kg_cov_cpp, kg_data, obj_func_max._num_IS)
for kg_n in range(num_iterations):
    print "itr {0}, {1}".format(kg_n, benchmark_result_table_name)
    ### First discretize points and then only keep the good points idea
예제 #11
0
        ### Format: IS 0: signal variance and length scales, IS 1: signal variance and length scales, etc.
        ###  Then observational noise for IS 0, IS 1 etc.

        hyperparameters_noise = numpy.power(best_hyper[-num_IS:], 2.0)
        hypers_GP = best_hyper[:-num_IS]

        # update noise in historical data
        updated_points_sampled_noise_variance = create_array_points_sampled_noise_variance(
            current_hist_data.points_sampled, hyperparameters_noise)

        # create new Historical data object with updated values
        new_historical_data = HistoricalData(
            dim=problem.obj_func_min.getDim() +
            1)  # increased by one for index of IS
        new_historical_data.append_historical_data(
            current_hist_data.points_sampled,
            current_hist_data.points_sampled_value,
            updated_points_sampled_noise_variance)

        # Use new hyperparameters -- this requires instantiating a new GP object
        kg_cov_cpp = cppMixedSquareExponential(hyperparameters=hypers_GP)
        kg_gp_cpp = GaussianProcessNew(kg_cov_cpp,
                                       new_historical_data,
                                       num_IS_in=problem.num_is_in)
        # kg_cov_cpp is not used afterwards

    ### Find IS and point that maximize KG/cost
    discretization_points = problem.obj_func_min.get_moe_domain(
    ).generate_uniform_random_points_in_domain(
        num_discretization_before_ranking)
    discretization_points = np.hstack((np.zeros(
        (num_discretization_before_ranking, 1)), discretization_points))
예제 #12
0
                                    sql_util.sql_engine).mean(axis=0).values
search_domain = pythonTensorProductDomain([
    ClosedInterval(bound[0], bound[1]) for bound in obj_func_max._search_domain
])
### Gen initial points
data = HistoricalData(obj_func_max._dim + 1)
for i in range(obj_func_max._num_IS):
    pts = search_domain.generate_uniform_random_points_in_domain(
        num_init_pts_all_IS[i])
    vals = [obj_func_max.evaluate(i + 1, pt) for pt in pts]
    IS_pts = numpy.hstack(
        ((i + 1) * numpy.ones(num_init_pts_all_IS[i]).reshape((-1, 1)), pts))
    sample_vars = [
        obj_func_max.noise_and_cost_func(i + 1, pt)[0] for pt in pts
    ]
    data.append_historical_data(IS_pts, vals, sample_vars)
cov_func = MixedSquareExponential(hyperparameters=hyper_param,
                                  total_dim=obj_func_max._dim + 1,
                                  num_is=obj_func_max._num_IS)
gp = GaussianProcess(cov_func, data)

# print "start max mu"
# num_randomization = 100000
# random_pts = search_domain.generate_uniform_random_points_in_domain(num_randomization)
# zero_random_pts = numpy.hstack((numpy.zeros((num_randomization, 1)), random_pts))
# print "random pts generated"
# mu_list = gp.compute_mean_of_points(zero_random_pts)
# print "compute mean completed"
# best_mu = numpy.amax(mu_list)
# best_pt = random_pts[numpy.argmax(mu_list), :]
예제 #13
0
                                              tolerance=1.0e-3)

cpp_sgd_params_ps = cppGradientDescentParameters(num_multistarts=1,
                                                 max_num_steps=12,
                                                 max_num_restarts=1,
                                                 num_steps_averaged=3,
                                                 gamma=0.7,
                                                 pre_mult=0.01,
                                                 max_relative_change=0.01,
                                                 tolerance=1.0e-5)

if obj_func_name == "GP":
    gp_grad_info_dict = pickle.load(open('random_gp_grad_1d', 'rb'))
    hist_data_grad = HistoricalData(gp_grad_info_dict['dim'], 1)
    hist_data_grad.append_historical_data(gp_grad_info_dict['points'],
                                          gp_grad_info_dict['values'],
                                          gp_grad_info_dict['vars'])
    objective_func = synthetic_functions.RandomGP(
        gp_grad_info_dict['dim'], gp_grad_info_dict['hyper_params'],
        hist_data_grad)
    hyper_params = gp_grad_info_dict['hyper_params']
    init_pts = [[-1.5], [-1.0], [1.0], [1.5]]
    ymax = 2
elif obj_func_name == "GP_wavy":
    gp_grad_info_dict = pickle.load(open('random_gp_1d_wavy', 'rb'))
    hist_data_grad = HistoricalData(gp_grad_info_dict['dim'], 0)
    hist_data_grad.append_historical_data(gp_grad_info_dict['points'],
                                          gp_grad_info_dict['values'],
                                          gp_grad_info_dict['vars'])
    print gp_grad_info_dict['values']
    objective_func = synthetic_functions.RandomGP(
예제 #14
0
def construct_hist_data_from_pickle(dim,
                                    directory,
                                    IS_filename_dict,
                                    combine_IS,
                                    sign,
                                    take_diff=False,
                                    primary_key=None):
    """
    :param dim: space dimension of the problem
    :type dim: int
    :param directory: dir of the pickle files
    :type directory: str
    :param IS_filename_dict: {IS: filename} hashtable which provides name of the pickle file for the corresponding IS
    :type IS_filename_dict: dict
    :param combine_IS: whether construct a single HistoricalData on the space IS \times space, or a dict of HistoricalData
    objects, with each corresponds to each IS
    :type combine_IS: bool
    :param sign: sign = 1.0 means minimization problem, otherwise is maximization
    :type sign: float
    :param take_diff: whether take diff between IS_i and primary_IS, this is enabled for one approach of estimating mKG
    hyperparameters
    :type take_diff: bool
    :param primary_key: if take_diff = True, this is used to specify primary IS
    :type primary_key: int
    :return: if combine_IS = True, return a HistoricalData object, otherwise return a dict of {IS: HistoricalData}
    :rtype: HistoricalData or dict
    """
    points_dict = {}
    vals_dict = {}
    noise_dict = {}
    if take_diff:
        with open(
                "{0}/{1}.pickle".format(directory,
                                        IS_filename_dict[primary_key]),
                "rb") as f:
            data = pickle.load(f)
            points_dict[primary_key] = np.array(data['points'])
            vals_dict[primary_key] = sign * np.array(data['vals'])
            noise_dict[primary_key] = np.array(data['noise'])
    for key in IS_filename_dict:
        if take_diff and key != primary_key:
            with open(
                    "{0}/{1}.pickle".format(directory, IS_filename_dict[key]),
                    "rb") as f:
                data = pickle.load(f)
                assert np.array_equal(
                    data['points'], points_dict[primary_key]
                ), "inconsistent points, cannot take diff!"
                points_dict[key] = np.array(data['points'])
                vals_dict[key] = sign * np.array(
                    data['vals']) - vals_dict[primary_key]
                noise_dict[key] = np.array(
                    data['noise']) + noise_dict[primary_key]
        elif not take_diff:
            with open(
                    "{0}/{1}.pickle".format(directory, IS_filename_dict[key]),
                    "rb") as f:
                data = pickle.load(f)
                points_dict[key] = np.array(data['points'])
                vals_dict[key] = sign * np.array(data['vals'])
                noise_dict[key] = np.array(data['noise'])
    if combine_IS:
        to_return = HistoricalData(dim=dim + 1)
        for key in points_dict:
            num_data = len(vals_dict[key])
            to_return.append_historical_data(
                np.hstack((key * np.ones(num_data).reshape(
                    (-1, 1)), points_dict[key])), vals_dict[key],
                noise_dict[key])
    else:
        to_return = {}
        for key in points_dict:
            to_return[key] = HistoricalData(dim=dim)
            to_return[key].append_historical_data(points_dict[key],
                                                  vals_dict[key],
                                                  noise_dict[key])
    return to_return
예제 #15
0
def construct_hist_data_from_s3(bucket,
                                dim,
                                IS_key_dict,
                                combine_IS,
                                sign,
                                take_diff=False,
                                primary_IS=None):
    """
    :param bucket: amazon s3 bucket object
    :param dim: space dimension of the problem
    :type dim: int
    :param IS_key_dict: {IS: key} hashtable which provides key of the data for the corresponding IS
    :type IS_key_dict: dict
    :param combine_IS: whether construct a single HistoricalData on the space IS \times space, or a dict of HistoricalData
    objects, with each corresponds to each IS
    :type combine_IS: bool
    :param sign: sign = 1.0 means minimization problem, otherwise is maximization
    :type sign: float
    :param take_diff: whether take diff between IS_i and primary_IS, this is enabled for one approach of estimating mKG
    hyperparameters
    :type take_diff: bool
    :param primary_key: if take_diff = True, this is used to specify primary IS
    :type primary_key: int
    :return: if combine_IS = True, return a HistoricalData object, otherwise return a dict of {IS: HistoricalData}
    :rtype: HistoricalData or dict
    """
    points_dict = {}
    vals_dict = {}
    noise_dict = {}
    if take_diff:
        data = get_data_from_s3(bucket, IS_key_dict[primary_IS])
        points_dict[primary_IS] = np.array(data['points'])
        vals_dict[primary_IS] = sign * np.array(data['vals'])
        noise_dict[primary_IS] = np.array(data['noise'])
    for IS in IS_key_dict:
        if take_diff and IS != primary_IS:
            data = get_data_from_s3(bucket, IS_key_dict[IS])
            assert np.array_equal(data['points'], points_dict[primary_IS]
                                  ), "inconsistent points, cannot take diff!"
            points_dict[IS] = np.array(data['points'])
            vals_dict[IS] = sign * np.array(
                data['vals']) - vals_dict[primary_IS]
            noise_dict[IS] = np.array(data['noise']) + noise_dict[primary_IS]
        elif not take_diff:
            data = get_data_from_s3(bucket, IS_key_dict[IS])
            points_dict[IS] = np.array(data['points'])
            vals_dict[IS] = sign * np.array(data['vals'])
            noise_dict[IS] = np.array(data['noise'])
    if combine_IS:
        to_return = HistoricalData(dim=dim + 1)
        for IS in points_dict:
            num_data = len(vals_dict[IS])
            to_return.append_historical_data(
                np.hstack((IS * np.ones(num_data).reshape(
                    (-1, 1)), points_dict[IS])), vals_dict[IS], noise_dict[IS])
    else:
        to_return = {}
        for IS in points_dict:
            to_return[IS] = HistoricalData(dim=dim)
            to_return[IS].append_historical_data(points_dict[IS],
                                                 vals_dict[IS], noise_dict[IS])
    return to_return
예제 #16
0
def obtainHistoricalDataForEGO(load_historical_data_from_pickle,
                               obj_func_min,
                               directoryToPickles,
                               list_IS_to_query,
                               num_init_pts_each_IS,
                               init_data_pickle_filename=''):
    '''
    Create Historical Data object for EGO that contains initial data.
    If truthIS is among the IS, then load only the data from that one
    Args:
        load_historical_data_from_pickle: if True load from pickle otherwise do a random Latin hypercube design
        obj_func_min: the problem
        directoryToPickles: path to the directory that contains the pickle files
        list_IS_to_query: list of the IS that should be queried, e.g. [0, 1, 2]
        num_init_pts_each_IS: how many points for each IS - is either used to find right pickle or to determine the number of points to sample
        init_data_pickle_filename: optional parameter that gives the filename of the pickle to load

    Returns: HistoricalData object

    '''
    historical_data = HistoricalData(obj_func_min._dim)
    if (load_historical_data_from_pickle):
        # To load the pickled data, do:
        if (init_data_pickle_filename == ''):
            init_data_pickle_filename = obj_func_min.getFuncName() + '_' + 'IS_' \
                                        + '_'.join(str(element) for element in list_IS_to_query) + '_' \
                                        + str(num_init_pts_each_IS) + "_points_each"
        init_pts_array, init_vals_array = load_data_from_a_min_problem(
            directoryToPickles, init_data_pickle_filename)

        # if truthIS is among the sampled, then load only that one:
        if obj_func_min.getTruthIS() in list_IS_to_query:
            indexArray = list_IS_to_query.index(obj_func_min.getTruthIS())
            sample_vars = [
                obj_func_min.noise_and_cost_func(obj_func_min.getTruthIS(),
                                                 pt)[0]
                for pt in init_pts_array[indexArray]
            ]
            historical_data.append_historical_data(init_pts_array[indexArray],
                                                   init_vals_array[indexArray],
                                                   sample_vars)
        else:
            # load data for all IS
            indexArray = 0
            for index_IS in list_IS_to_query:
                sample_vars = [
                    obj_func_min.noise_and_cost_func(index_IS, pt)[0]
                    for pt in init_pts_array[indexArray]
                ]
                historical_data.append_historical_data(
                    init_pts_array[indexArray], init_vals_array[indexArray],
                    sample_vars)
                indexArray += 1
    else:
        # generate initial data from querying random points for each IS
        for index_IS in list_IS_to_query:
            if (obj_func_min.getTruthIS() in list_IS_to_query) and (
                    index_IS != obj_func_min.getTruthIS()):
                continue  # the truthIS is observed but this is another IS: skip!

            search_domain = pythonTensorProductDomain([
                ClosedInterval(bound[0], bound[1])
                for bound in obj_func_min._search_domain
            ])
            pts = search_domain.generate_uniform_random_points_in_domain(
                num_init_pts_each_IS)
            vals = [obj_func_min.evaluate(index_IS, pt) for pt in pts]
            sample_vars = [
                obj_func_min.noise_and_cost_func(index_IS, pt)[0] for pt in pts
            ]
            historical_data.append_historical_data(pts, vals, sample_vars)

    return historical_data