예제 #1
0
def sample_intial_x_general(problem, num_initial_pts_per_s, points_x, exp_path,
                            result_path):
    list_init_pts_value_noise = []
    new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim())
    repQL = problem.obj_func_min.repQL
    s_min = problem.obj_func_min.getSearchDomain()[0, 0]
    s_max = problem.obj_func_min.getSearchDomain()[0, 1]
    for s in np.linspace(s_min, s_max, num=problem.obj_func_min.getNums()):
        random_seeds = np.random.randint(900, size=num_initial_pts_per_s)
        points = np.hstack((s * np.ones(num_initial_pts_per_s).reshape(
            (-1, 1)), points_x))

        vals_array, noise_array = np.zeros(num_initial_pts_per_s), np.zeros(
            num_initial_pts_per_s)
        i = -1
        for (pt, random_seed) in zip(points, random_seeds):
            i += 1
            value, noise_array[i] = problem.obj_func_min.evaluate(
                repQL, pt, random_seed, exp_path)
            vals_array[i] = -1.0 * value

        new_historical_data.append_historical_data(points, vals_array,
                                                   noise_array)

        pts_value_noise = np.hstack((points, vals_array.reshape(
            (-1, 1)), noise_array.reshape((-1, 1))))
        list_init_pts_value_noise.append(pts_value_noise)
        with open(result_path + '_initial_samples.txt', "w") as file:
            file.write(str(list_init_pts_value_noise))
        with open(result_path + '_initial_samples.pickle', "wb") as file:
            dump(np.array(list_init_pts_value_noise), file)
    # print(list_init_pts_value_noise)
    return new_historical_data
예제 #2
0
def createHistoricalDataGeneral(dim_obj_func_min,
                                listPrevData,
                                mult,
                                indexFirstIS=0):
    '''

    Args:
        dim_obj_func_min: dim of the obj function, as given in obj_func_min._dim
        listPrevData: list of tuples (data, vals, noise)
        indexFirstIS: what is the number of the first IS given in listPrevData. Others are numbered consecutively

    Returns: HistoricalData object for KG (with additional first column that gives the IS the data corresponds to

    '''
    data = HistoricalData(dim_obj_func_min + 1)
    indexIS = indexFirstIS  # this is the number that corresponds to the IS-dimension in the GP
    for dataset in listPrevData:
        # add first column that gives the IS the data corresponds to
        IS_pts = numpy.hstack((indexIS * numpy.ones(len(dataset[0])).reshape(
            (-1, 1)), dataset[0]))

        # multiply all values by -1 since we assume that the training data stems from the minimization version
        # but misoKG uses the maximization version
        vals = mult * numpy.array(dataset[1])
        data.append_historical_data(IS_pts, vals, dataset[2])
        indexIS += 1
    return data
예제 #3
0
def gp_mean_var(
        points_sampled,
        points_to_evaluate,
        rest_host=DEFAULT_HOST,
        rest_port=DEFAULT_PORT,
        testapp=None,
        **kwargs
):
    """Hit the rest endpoint for calculating the posterior mean and variance of a gaussian process, given points already sampled."""
    endpoint = ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT[GP_MEAN_VAR_ROUTE_NAME]
    raw_payload = kwargs.copy()  # Any options can be set via the kwargs ('covariance_info' etc.)

    raw_payload['points_to_evaluate'] = points_to_evaluate

    # Sanitize input points
    points_sampled_clean = [SamplePoint._make(point) for point in points_sampled]
    historical_data = HistoricalData(
            len(points_to_evaluate[0]),  # The dim of the space
            sample_points=points_sampled_clean,
            )

    if 'gp_historical_info' not in raw_payload:
        raw_payload['gp_historical_info'] = historical_data.json_payload()

    if 'domain_info' not in raw_payload:
        raw_payload['domain_info'] = {'dim': len(points_to_evaluate[0])}

    json_payload = json.dumps(raw_payload)

    json_response = call_endpoint_with_payload(rest_host, rest_port, endpoint, json_payload, testapp)

    output = GpMeanVarResponse().deserialize(json_response)

    return output.get('mean'), output.get('var')
예제 #4
0
def sample_initial_data(problem, num_initial_pts_per_IS):
    points = problem.obj_func_min.get_moe_domain().generate_uniform_random_points_in_domain(num_initial_pts_per_IS)
    points_dict = {}
    vals_dict = {}
    noise_dict = {}
    new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim() + 1)  # increased by one for index of IS
    for IS in problem.obj_func_min.getList_IS_to_query():
        points_dict[IS] = np.hstack((IS * np.ones(num_initial_pts_per_IS).reshape((-1, 1)), points))
        vals_dict[IS] = np.array([-1.0 * problem.obj_func_min.evaluate(IS, pt) for pt in points])
        noise_dict[IS] = np.ones(len(points)) * problem.obj_func_min.noise_and_cost_func(IS, None)[0]
        # note: misoKG will learn the noise from sampled data
        new_historical_data.append_historical_data(points_dict[IS], vals_dict[IS], noise_dict[IS])
    return new_historical_data
예제 #5
0
def createHistoricalDataForMisoKGDiff(dim_obj_func_min,
                                      listPrevData,
                                      directory,
                                      bias_filename,
                                      mult=-1.0):
    """ This data is only used to train mKG hyperparams, and suppose listPrevData[0] is unbiased IS
    :param dim_obj_func_min:
    :param listPrevData:
    :param directory:
    :param bias_filename:
    :return:
    """
    with open("{0}/{1}.pickle".format(directory, bias_filename),
              "rb") as input_file:
        bias_data = pickle.load(input_file)
    data_IS0 = HistoricalData(dim_obj_func_min)
    data_IS0.append_historical_data(listPrevData[0][0],
                                    mult * numpy.array(listPrevData[0][1]),
                                    numpy.array(listPrevData[0][2]))
    data_list = [data_IS0]
    for i in range(len(listPrevData) - 1):
        data = HistoricalData(dim_obj_func_min)
        data.append_historical_data(
            bias_data['points'][i][:200, :],
            mult * numpy.array(bias_data['vals'][i][:200]),
            numpy.ones(len(bias_data['vals'][i][:200])) *
            (numpy.mean(listPrevData[0][2]) +
             numpy.mean(listPrevData[i + 1][2])))
        data_list.append(data)
    return data_list
예제 #6
0
    def test_1d_analytic_ei_edge_cases(self):
        """Test cases where analytic EI would attempt to compute 0/0 without variance lower bounds."""
        base_coord = numpy.array([0.5])
        point1 = SamplePoint(base_coord, -1.809342, 0)
        point2 = SamplePoint(base_coord * 2.0, -1.09342, 0)

        # First a symmetric case: only one historical point
        data = HistoricalData(base_coord.size, [point1])

        hyperparameters = numpy.array([0.2, 0.3])
        covariance = SquareExponential(hyperparameters)
        gaussian_process = GaussianProcess(covariance, data)

        point_to_sample = base_coord
        ei_eval = ExpectedImprovement(gaussian_process, point_to_sample)

        ei = ei_eval.compute_expected_improvement()
        grad_ei = ei_eval.compute_grad_expected_improvement()
        self.assert_scalar_within_relative(ei, 0.0, 1.0e-15)
        self.assert_vector_within_relative(grad_ei, numpy.zeros(grad_ei.shape), 1.0e-15)

        shifts = (1.0e-15, 4.0e-11, 3.14e-6, 8.89e-1, 2.71)
        self._check_ei_symmetry(ei_eval, point_to_sample, shifts)

        # Now introduce some asymmetry with a second point
        # Right side has a larger objetive value, so the EI minimum
        # is shifted *slightly* to the left of best_so_far.
        gaussian_process.add_sampled_points([point2])
        shift = 3.0e-12
        ei_eval = ExpectedImprovement(gaussian_process, point_to_sample - shift)
        ei = ei_eval.compute_expected_improvement()
        grad_ei = ei_eval.compute_grad_expected_improvement()
        self.assert_scalar_within_relative(ei, 0.0, 1.0e-15)
        self.assert_vector_within_relative(grad_ei, numpy.zeros(grad_ei.shape), 1.0e-15)
예제 #7
0
def build_random_gaussian_process(points_sampled,
                                  covariance,
                                  noise_variance=None,
                                  gaussian_process_type=GaussianProcess):
    r"""Utility to draw ``points_sampled.shape[0]`` points from a GaussianProcess prior, add those values to the GP, and return the GP.

    This is mainly useful for testing or when "random" data is needed that will produce reasonably well-behaved GPs.

    :param points_sampled: points at which to draw from the GP
    :type points_sampled: array of float64 with shape (num_sampled, dim)
    :param covariance: covariance function backing the GP
    :type covariance: interfaces.covariance_interface.CovarianceInterface subclass composable with gaussian_process_type
    :param noise_variance: the ``\sigma_n^2`` (noise variance) associated w/the new observations, ``points_sampled_value``
    :type noise_variance: array of float64 with shape (num_sampled)
    :param gaussian_process_type: gaussian process whose historical data is being set
    :type gaussian_process_type: interfaces.gaussian_process_interface.GaussianProcessInterface subclass
    :return: a gaussian process with the generated prior data
    :rtype: gaussian_process_type object

    """
    if noise_variance is None:
        noise_variance = numpy.zeros(points_sampled.shape[0])

    gaussian_process = gaussian_process_type(
        covariance, HistoricalData(points_sampled.shape[1]))
    for i, point in enumerate(points_sampled):
        # Draw function value from the GP
        function_value = gaussian_process.sample_point_from_gp(
            point, noise_variance=noise_variance[i])
        # Add function value back into the GP
        sample_point = [SamplePoint(point, function_value, noise_variance[i])]
        gaussian_process.add_sampled_points(sample_point)

    return gaussian_process
예제 #8
0
class Experiment(object):
    """A class for MOE optimizable experiments."""
    def __init__(self, domain_bounds, points_sampled=None):
        """Construct a MOE optimizable experiment.

        **Required arguments:**

            :param domain_bounds: The bounds for the optimization experiment
            :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension

        **Optional arguments:**

            :param points_sampled: The historic points sampled and their objective function values
            :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation

        """
        _domain_bounds = [
            ClosedInterval(bound[0], bound[1]) for bound in domain_bounds
        ]
        self.domain = TensorProductDomain(_domain_bounds)
        self.historical_data = HistoricalData(
            self.domain.dim,
            sample_points=points_sampled,
        )

    def build_json_payload(self):
        """Construct a json serializeable and MOE REST recognizeable dictionary of the experiment."""
        return {
            'domain_info': self.domain.get_json_serializable_info(),
            'gp_historical_info': self.historical_data.json_payload(),
        }

    def __str__(self):
        """Return a pprint formated version of the experiment dict."""
        return pprint.pformat(self.build_json_payload)
예제 #9
0
    def test_sample_point_from_gp(self):
        """Test that sampling points from the GP works."""
        point_one = SamplePoint([0.0, 1.0], -1.0, 0.0)
        point_two = SamplePoint([2.0, 2.5], 1.0, 0.1)
        covariance = SquareExponential([1.0, 1.0, 1.0])
        historical_data = HistoricalData(len(point_one.point),
                                         [point_one, point_two])

        gaussian_process = GaussianProcess(covariance, historical_data)
        out_values = numpy.zeros(3)
        for i in xrange(3):
            out_values[i] = gaussian_process.sample_point_from_gp(
                point_two.point, 0.001)

        gaussian_process._gaussian_process.reset_to_most_recent_seed()
        out_values_test = numpy.ones(3)
        for i in xrange(3):
            out_values_test[i] = gaussian_process.sample_point_from_gp(
                point_two.point, 0.001)

        # Exact match b/c we should've run over the exact same computations
        self.assert_vector_within_relative(out_values_test, out_values, 0.0)

        # Sampling from a historical point (that had 0 noise) should produce the same value associated w/that point
        value = gaussian_process.sample_point_from_gp(point_one.point, 0.0)
        self.assert_scalar_within_relative(value, point_one.value,
                                           numpy.finfo(numpy.float64).eps)
예제 #10
0
class Experiment(object):

    """A class for MOE optimizable experiments."""

    def __init__(self, domain_bounds, points_sampled=None):
        """Construct a MOE optimizable experiment.

        **Required arguments:**

            :param domain_bounds: The bounds for the optimization experiment
            :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension

        **Optional arguments:**

            :param points_sampled: The historic points sampled and their objective function values
            :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation

        """
        _domain_bounds = [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]
        self.domain = TensorProductDomain(_domain_bounds)
        self.historical_data = HistoricalData(
                self.domain.dim,
                sample_points=points_sampled,
                )

    def build_json_payload(self):
        """Construct a json serializeable and MOE REST recognizeable dictionary of the experiment."""
        return {
                'domain_info': self.domain.get_json_serializable_info(),
                'gp_historical_info': self.historical_data.json_payload(),
                }

    def __str__(self):
        """Return a pprint formated version of the experiment dict."""
        return pprint.pformat(self.build_json_payload)
예제 #11
0
def _make_gp_from_params(params):
    """Create and return a C++ backed gaussian_process from the request params as a dict.

    ``params`` has the following form::

        params = {
            'gp_historical_info': <instance of :class:`moe.views.schemas.base_schemas.GpHistoricalInfo`>,
            'domain_info': <instance of :class:`moe.views.schemas.base_schemas.DomainInfo`>,
            'covariance_info': <instance of :class:`moe.views.schemas.base_schemas.CovarianceInfo`>,
            }

    :param params: The request params dict
    :type params: dict

    """
    # Load up the info
    gp_historical_info = params.get("gp_historical_info")
    domain_info = params.get("domain_info")
    points_sampled = gp_historical_info.get('points_sampled')

    sample_point_list = []
    for point in points_sampled:
        sample_point_list.append(
            SamplePoint(
                point['point'],
                point['value'],
                point['value_var'],
            ))
    optimizer_info = params.get('optimizer_info', {})
    optimizer_type = optimizer_info.get('optimizer_type', None)

    if optimizer_type == L_BFGS_B_OPTIMIZER:
        covariance_of_process = _make_covariance_of_process_from_params(
            params, "python")
        gaussian_process = pythonGaussianProcess(
            covariance_of_process,
            HistoricalData(domain_info.get('dim'), sample_point_list),
        )
    else:
        covariance_of_process = _make_covariance_of_process_from_params(params)
        gaussian_process = GaussianProcess(
            covariance_of_process,
            HistoricalData(domain_info.get('dim'), sample_point_list),
        )

    return gaussian_process
예제 #12
0
def gp_hyper_opt(points_sampled,
                 rest_host=DEFAULT_HOST,
                 rest_port=DEFAULT_PORT,
                 testapp=None,
                 **kwargs):
    """Hit the rest endpoint for optimizing the hyperparameters of a gaussian process, given points already sampled."""
    endpoint = ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT[GP_HYPER_OPT_ROUTE_NAME]
    # This will fail if len(points_sampled) == 0; but then again this endpoint doesn't make sense with 0 historical data
    gp_dim = len(points_sampled[0][0])
    raw_payload = kwargs.copy()

    # Sanitize input points
    points_sampled_clean = [
        SamplePoint._make(point) for point in points_sampled
    ]
    historical_data = HistoricalData(
        gp_dim,
        sample_points=points_sampled_clean,
    )

    if 'domain_info' not in raw_payload:
        raw_payload['domain_info'] = {'dim': gp_dim}

    if 'gp_historical_info' not in raw_payload:
        raw_payload['gp_historical_info'] = historical_data.json_payload()

    if 'hyperparameter_domain_info' not in raw_payload:
        hyper_dim = gp_dim + 1  # default covariance has this many parameters
        raw_payload['hyperparameter_domain_info'] = {
            'dim': hyper_dim,
            'domain_bounds': [{
                'min': 0.1,
                'max': 2.0
            }] * hyper_dim,
        }

    json_payload = json.dumps(raw_payload)

    json_response = call_endpoint_with_payload(rest_host, rest_port, endpoint,
                                               json_payload, testapp)

    output = GpHyperOptResponse().deserialize(json_response)

    return output['covariance_info']
예제 #13
0
    def test_gp_construction_singular_covariance_matrix(self):
        """Test that the GaussianProcess ctor indicates a singular covariance matrix when points_sampled contains duplicates (0 noise)."""
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 1))
        domain, gaussian_process = self.gp_test_environments[index]
        point_one = SamplePoint([0.0] * domain.dim, 1.0, 0.0)
        # points two and three have duplicate coordinates and we have noise_variance = 0.0
        point_two = SamplePoint([1.0] * domain.dim, 1.0, 0.0)
        point_three = point_two

        historical_data = HistoricalData(len(point_one.point), [point_one, point_two, point_three])
        T.assert_raises(C_GP.SingularMatrixException, GaussianProcess, gaussian_process.get_covariance_copy(), historical_data)
예제 #14
0
def load_sample_data(problem, num_per_var, exp_path, result_path):
    var_dim = int(problem.obj_func_min.getDim()) - 1
    num_initial_pts_per_s = int(num_per_var * var_dim)
    with open(result_path + '_initial_samples.pickle', 'rb') as file:
        list_init_pts_value_noise = pickle.load(file)
    new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim())
    count = -1
    repQL = problem.obj_func_min.repQL
    s_min = problem.obj_func_min.getSearchDomain()[0, 0]
    s_max = problem.obj_func_min.getSearchDomain()[0, 1]
    for s in np.linspace(s_min, s_max, num=problem.obj_func_min.getNums()):
        count += 1
        pts_value_noise = list_init_pts_value_noise[count]
        points = pts_value_noise[:, 0:-2]
        vals_array = pts_value_noise[:, -2]
        noise_array = pts_value_noise[:, -1]
        new_historical_data.append_historical_data(points, vals_array,
                                                   noise_array)

    return new_historical_data
예제 #15
0
def gp_hyper_opt(
        points_sampled,
        rest_host=DEFAULT_HOST,
        rest_port=DEFAULT_PORT,
        testapp=None,
        **kwargs
        ):
    """Hit the rest endpoint for optimizing the hyperparameters of a gaussian process, given points already sampled."""
    endpoint = ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT[GP_HYPER_OPT_ROUTE_NAME]
    # This will fail if len(points_sampled) == 0; but then again this endpoint doesn't make sense with 0 historical data
    gp_dim = len(points_sampled[0][0])
    raw_payload = kwargs.copy()

    # Sanitize input points
    points_sampled_clean = [SamplePoint._make(point) for point in points_sampled]
    historical_data = HistoricalData(
            gp_dim,
            sample_points=points_sampled_clean,
            )

    if 'domain_info' not in raw_payload:
        raw_payload['domain_info'] = {'dim': gp_dim}

    if 'gp_historical_info' not in raw_payload:
        raw_payload['gp_historical_info'] = historical_data.json_payload()

    if 'hyperparameter_domain_info' not in raw_payload:
        hyper_dim = gp_dim + 1  # default covariance has this many parameters
        raw_payload['hyperparameter_domain_info'] = {
            'dim': hyper_dim,
            'domain_bounds': [{'min': 0.1, 'max': 2.0}] * hyper_dim,
        }

    json_payload = json.dumps(raw_payload)

    json_response = call_endpoint_with_payload(rest_host, rest_port, endpoint, json_payload, testapp)

    output = GpHyperOptResponse().deserialize(json_response)

    return output['covariance_info']
예제 #16
0
    def __init__(self, domain_bounds, points_sampled=None):
        """Construct a MOE optimizable experiment.

        **Required arguments:**

            :param domain_bounds: The bounds for the optimization experiment
            :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension

        **Optional arguments:**

            :param points_sampled: The historic points sampled and their objective function values
            :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation

        """
        _domain_bounds = [
            ClosedInterval(bound[0], bound[1]) for bound in domain_bounds
        ]
        self.domain = TensorProductDomain(_domain_bounds)
        self.historical_data = HistoricalData(
            self.domain.dim,
            sample_points=points_sampled,
        )
예제 #17
0
def createHistoricalDataForMisoEI(dim_obj_func_min, listPrevData, directory,
                                  bias_filename):
    """ Note: since misoEI uses notion of fidelity variance, I set it to noise_var + bias^2, where bias is estimated
    from biasData
    :param dim_obj_func_min:
    :param listPrevData:
    :return:
    """
    with open("{0}/{1}.pickle".format(directory, bias_filename),
              "rb") as input_file:
        bias_data = pickle.load(input_file)
    bias_sq_list = numpy.power(
        numpy.concatenate(([0.], [
            numpy.mean(bias_data['vals'][i])
            for i in range(len(listPrevData) - 1)
        ])), 2.0)
    data_list = []
    for i, dataset in enumerate(listPrevData):
        data = HistoricalData(dim_obj_func_min)
        data.append_historical_data(dataset[0], dataset[1],
                                    numpy.array(dataset[2]) + bias_sq_list[i])
        data_list.append(data)
    return data_list, bias_sq_list
예제 #18
0
    def __init__(self, domain_bounds, points_sampled=None):
        """Construct a MOE optimizable experiment.

        **Required arguments:**

            :param domain_bounds: The bounds for the optimization experiment
            :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension

        **Optional arguments:**

            :param points_sampled: The historic points sampled and their objective function values
            :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation

        """
        _domain_bounds = [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]
        self.domain = TensorProductDomain(_domain_bounds)
        self.historical_data = HistoricalData(
                self.domain.dim,
                sample_points=points_sampled,
                )
예제 #19
0
def get_random_gp_data(space_dim, num_is, num_data_each_is, kernel_name):
    """ Generate random gp data
    :param space_dim:
    :param num_is:
    :param num_data_each_is:
    :param kernel_name: currently it's either 'mix_exp' or 'prod_ker'
    :return:
    """
    sample_var = 0.01
    if kernel_name == "mix_exp":
        hyper_params = numpy.random.uniform(size=(num_is + 1) *
                                            (space_dim + 1))
        cov = MixedSquareExponential(hyper_params, space_dim + 1, num_is)
    elif kernel_name == "prod_ker":
        hyper_params = numpy.random.uniform(size=(num_is + 1) *
                                            (num_is + 2) / 2 + space_dim + 1)
        cov = ProductKernel(hyper_params, space_dim + 1, num_is + 1)
    else:
        raise NotImplementedError("invalid kernel")
    python_search_domain = pythonTensorProductDomain([
        ClosedInterval(bound[0], bound[1])
        for bound in numpy.repeat([[-10., 10.]], space_dim + 1, axis=0)
    ])
    data = HistoricalData(space_dim + 1)
    init_pts = python_search_domain.generate_uniform_random_points_in_domain(2)
    init_pts[:, 0] = numpy.zeros(2)
    data.append_historical_data(init_pts, numpy.zeros(2),
                                numpy.ones(2) * sample_var)
    gp = GaussianProcess(cov, data)
    points = python_search_domain.generate_uniform_random_points_in_domain(
        num_data_each_is)
    for pt in points:
        for i in range(num_is):
            pt[0] = i
            val = gp.sample_point_from_gp(pt, sample_var)
            data.append_sample_points([
                [pt, val, sample_var],
            ])
            gp = GaussianProcess(cov, data)
    return hyper_params, data
예제 #20
0
 def generate_data(self, num_data):
     python_search_domain = pythonTensorProductDomain([
         ClosedInterval(bound[0], bound[1])
         for bound in self._info_dict['search_domain']
     ])
     data = HistoricalData(self._info_dict['dim'])
     init_pts = python_search_domain.generate_uniform_random_points_in_domain(
         2)
     init_pts[:, 0] = numpy.zeros(2)
     data.append_historical_data(init_pts, numpy.zeros(2),
                                 numpy.ones(2) * self._sample_var_1)
     gp = GaussianProcess(self._cov, data)
     points = python_search_domain.generate_uniform_random_points_in_domain(
         num_data)
     for pt in points:
         pt[0] = numpy.ceil(numpy.random.uniform(high=2.0, size=1))
         sample_var = self._sample_var_1 if pt[
             0] == 1 else self._sample_var_2
         val = gp.sample_point_from_gp(pt, sample_var)
         data.append_sample_points([
             [pt, val, sample_var],
         ])
         gp = GaussianProcess(self._cov, data)
     return data
예제 #21
0
observations = [0] + [i + 1 for i in derivatives]
init_pts_value = np.array(
    [objective_func.evaluate(pt) for pt in init_pts]
)  # [:, observations]
true_value_init = np.array(
    [objective_func.evaluate_true(pt) for pt in init_pts]
)  # [:, observations]

# Collecting Data
s_suggest = np.array(init_pts)
f_s_suggest = np.array(init_pts_value).reshape(initial_n, 1)
s_recommend = np.array(init_pts)
f_s_recommend = np.array(true_value_init).reshape(initial_n, 1)
elapsed = np.zeros([1, num_iteration + initial_n])

init_data = HistoricalData(dim=objective_func._dim, num_derivatives=len(derivatives))
init_data.append_sample_points(
    [
        SamplePoint(
            pt,
            [init_pts_value[num, i] for i in observations],
            objective_func._sample_var,
        )
        for num, pt in enumerate(init_pts)
    ]
)

# initialize the model
prior = DefaultPrior(1 + dim + len(observations), len(observations))

# noisy = False means the underlying function being optimized is noise-free
    def test(self):

        rb = RosenbrockVanilla()
        func_name = rb.getFuncName()

        pathToPickles = 'picklesTest'

        ### Test load_data_from_a_min_problem()
        name_testfile = 'load_and_store_Test'
        samples = numpy.array([[[1, 1], [1, 2]]])
        #print samples
        values = [[1.0, 2.0]]
        data = {"points": samples, "vals": values}

        with open("{0}/{1}.pickle".format(pathToPickles, name_testfile),
                  "wb") as output_file:
            pickle.dump(data, output_file)
        loaded_pts, loaded_vals = load_data_from_a_min_problem(
            pathToPickles, name_testfile)
        for index in range(len(samples)):
            self.assertTrue((samples[index] == loaded_pts[index]).all)
        for index in range(len(values)):
            self.assertTrue((values[index] == loaded_vals[index]))

        # test overwriting
        samples = numpy.array([[[1, 4], [1, 2]]])
        with open("{0}/{1}.pickle".format(pathToPickles, name_testfile),
                  "wb") as output_file:
            pickle.dump(data, output_file)
        loaded_pts, loaded_vals = load_data_from_a_min_problem(
            pathToPickles, name_testfile)
        for index in range(len(samples)):
            self.assertTrue((samples[index] == loaded_pts[index]).all)
        for index in range(len(values)):
            self.assertTrue((values[index] == loaded_vals[index]))

        ### Test obtainHistoricalDataForEGO()

        #TODO come up with tests for these  functions

        list_IS_to_query = [0]
        num_init_pts_each_IS = 10

        name_testfile = rb.getFuncName() + '_' + 'IS_' + '_'.join(
            str(element) for element in list_IS_to_query) + '_' + str(
                num_init_pts_each_IS) + "_points_each"

        with open("{0}/{1}.pickle".format(pathToPickles, name_testfile),
                  "wb") as output_file:
            pickle.dump(data, output_file)

        # testHistoricalData = obtainHistoricalDataForEGO(True, rb, pathToPickles, list_IS_to_query, num_init_pts_each_IS)
        # print testHistoricalData
        #
        # testHistoricalDataRandom = obtainHistoricalDataForEGO(False, rb, pathToPickles, list_IS_to_query, num_init_pts_each_IS)
        # print testHistoricalDataRandom

        ### Test createHistoricalDataForKG()
        listPrevData = []

        samples = [[1, 1], [1, 2]]
        values = [1.0, 2.0]
        list_noise_variance_at_sample = [0.1, 0.3]
        listPrevData.append((samples, values, list_noise_variance_at_sample))

        hist_kg = createHistoricalDataForKG(rb._dim, listPrevData)
        #print hist_kg
        IS_samples = [[0, 1, 1], [0, 1, 2]]
        for index in range(len(hist_kg.points_sampled)):
            self.assertTrue(
                (IS_samples[index] == hist_kg.points_sampled[index]).all)
        for index in range(len(hist_kg.points_sampled_value)):
            self.assertTrue(
                (values[index] == hist_kg.points_sampled_value[index]).all)

        samples = [[0, 0], [4, 3]]
        for index in range(len(hist_kg.points_sampled)):
            self.assertTrue(
                (IS_samples[index] == hist_kg.points_sampled[index]).all)

        listPrevData = [(samples, values, list_noise_variance_at_sample)]
        bestpt, bestval, best_truth = findBestSampledValue(rb, listPrevData, 0)
        # print findBestSampledValue(rb, listPrevData, 0)
        self.assertAlmostEqual(bestval, 1.0, delta=.0001)
        self.assertAlmostEqual(bestval, 1.0, delta=0.0001)
        # self.assertAlmostEqual(bestval, 1.0, delta=0.0001)
        self.assertAlmostEqual(best_truth, numpy.float64(-9.0), delta=1.0)
        self.assertTrue((bestpt == [0.0, 0.0]))

        list_sampled_IS = [0, 0]
        gathered_data_from_all_replications = []
        gathered_data_from_all_replications.append({
            "points": samples,
            "vals": values,
            "noise_variance": list_noise_variance_at_sample,
            "sampledIS": list_sampled_IS
        })

        for indexList in range(len(gathered_data_from_all_replications)):
            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['vals'])):
                self.assertAlmostEqual(
                    values[indexElem],
                    gathered_data_from_all_replications[indexList]['vals']
                    [indexElem],
                    delta=0.0001)

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['points'])):
                self.assertTrue(samples[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['points'][indexElem])

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['sampledIS'])):
                self.assertTrue(list_sampled_IS[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['sampledIS'][indexElem])

        gathered_data_from_all_replications.append({
            "points": samples,
            "vals": values,
            "noise_variance": list_noise_variance_at_sample,
            "sampledIS": list_sampled_IS
        })
        for indexList in range(len(gathered_data_from_all_replications)):
            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['vals'])):
                self.assertAlmostEqual(
                    values[indexElem],
                    gathered_data_from_all_replications[indexList]['vals']
                    [indexElem],
                    delta=0.0001)

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['points'])):
                self.assertTrue(samples[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['points'][indexElem])

            for indexElem in range(
                    len(gathered_data_from_all_replications[indexList]
                        ['sampledIS'])):
                self.assertTrue(list_sampled_IS[indexElem] ==
                                gathered_data_from_all_replications[indexList]
                                ['sampledIS'][indexElem])

        samples = [[-1., 0], [0.1, -2.0]]
        values = [0.2, 1.5]
        list_sampled_IS = [3, 3]
        gathered_data_from_all_replications.append({
            "points": samples,
            "vals": values,
            "noise_variance": list_noise_variance_at_sample,
            "sampledIS": list_sampled_IS
        })
        for indexElem in range(
                len(gathered_data_from_all_replications[2]['vals'])):
            self.assertAlmostEqual(
                values[indexElem],
                gathered_data_from_all_replications[2]['vals'][indexElem],
                delta=0.0001)

        for indexElem in range(
                len(gathered_data_from_all_replications[2]['points'])):
            self.assertTrue(
                samples[indexElem] == gathered_data_from_all_replications[2]
                ['points'][indexElem])

        for indexElem in range(
                len(gathered_data_from_all_replications[2]['sampledIS'])):
            self.assertTrue(
                list_sampled_IS[indexElem] ==
                gathered_data_from_all_replications[2]['sampledIS'][indexElem])

        listPrevData.append(
            (gathered_data_from_all_replications[2]['points'],
             gathered_data_from_all_replications[2]['vals'],
             gathered_data_from_all_replications[2]['noise_variance']))

        hist_kg = createHistoricalDataForKG(rb._dim, listPrevData)
        #print hist_kg
        self.assertTrue((hist_kg.points_sampled[0] == [0, 0, 0]).all)
        self.assertTrue((hist_kg.points_sampled[1] == [0, 4, 3]).all)
        self.assertTrue((hist_kg.points_sampled[2] == [1, -1.0, 0]).all)
        self.assertTrue((hist_kg.points_sampled[3] == [1, .1, -2]).all)

        self.assertAlmostEqual(values[0],
                               -1.0 * hist_kg.points_sampled_value[2],
                               delta=0.0001)
        self.assertAlmostEqual(values[1],
                               -1.0 * hist_kg.points_sampled_value[3],
                               delta=0.0001)

        self.assertAlmostEqual(list_noise_variance_at_sample[0],
                               hist_kg.points_sampled_noise_variance[2],
                               delta=0.0001)
        self.assertAlmostEqual(list_noise_variance_at_sample[1],
                               hist_kg.points_sampled_noise_variance[3],
                               delta=0.0001)

        ### Test for findBestSampledValueFromHistoricalData()
        atoext = AssembleToOrderExtended(mult=-1.0)
        hd = HistoricalData(atoext.getDim())
        pts = numpy.array([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                           [1.0, 0.2, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4]])
        self.assertTrue(len(pts) == 2)
        self.assertTrue(len(pts[0]) == atoext.getDim())
        self.assertTrue(len(pts[1]) == atoext.getDim())
        vals = [-1.0, 0.2]
        noises = [0.1, 0.2]
        hd.append_historical_data(pts, vals, noises)
        # print hd.to_list_of_sample_points()

        bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData(
            atoext, hd)
        # print bestpt
        # print best_val
        # print best_truth
        self.assertTrue((pts[0] == bestpt).all)
        self.assertTrue(best_val == -1.0)
        self.assertAlmostEqual(best_truth,
                               atoext.evaluate(2, bestpt),
                               delta=10.0)

        pts = numpy.array([[1.3, 1.4, 10.0, 11.0, 19.0, 1.0, 1.0, 1.0],
                           [13.0, 10.2, 10.3, 10.4, 10.5, 0.2, 10.3, 0.4]])
        vals = [-11.0, 10.2]
        noises = [10.1, 1000.2]
        hd.append_historical_data(pts, vals, noises)
        bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData(
            atoext, hd)
        self.assertTrue((pts[0] == bestpt).all)
        self.assertTrue(best_val == -11.0)

        pts2 = numpy.array([[10.3, 10.4, 10.0, 11.0, 19.0, 1.0, 1.0, 1.0],
                            [13.0, 10.2, 10.3, 10.4, 10.5, 0.2, 10.3, 0.4]])
        vals = [11.0, 10.2]
        hd.append_historical_data(pts, vals, noises)
        bestpt, best_val, best_truth = findBestSampledValueFromHistoricalData(
            atoext, hd)
        self.assertTrue((pts[0] == bestpt).all)
        self.assertTrue(best_val == -11.0)
예제 #23
0
def main():

    args = docopt(__doc__)

    # Parse arguments
    mesh = args['<mesh>']
    weights = np.load(args['<weightfile>'])
    init_centroid = np.genfromtxt(args['<init_centroid>'])
    coil = args['<coil>']
    output_file = args['<output_file>']
    cpus = int(args['--cpus']) or 8
    tmpdir = args['--tmp-dir'] or os.getenv('TMPDIR') or "/tmp/"
    num_iters = int(args['--n-iters']) or 50
    min_samps = int(args['--min-var-samps']) or 10
    tol = float(args['--convergence']) or 0.001
    history = args['--history']
    skip_convergence = args['--skip-convergence']
    options = args['--options']

    if options:
        with open(options, 'r') as f:
            opts = json.load(f)
        logging.info("Using custom options file {}".format(options))
        logging.info("{}".format('\''.join(
            [f"{k}:{v}" for k, v in opts.items()])))
    else:
        opts = {}

    logging.info('Using {} cpus'.format(cpus))

    f = FieldFunc(mesh_file=mesh,
                  initial_centroid=init_centroid,
                  tet_weights=weights,
                  coil=coil,
                  field_dir=tmpdir,
                  cpus=cpus,
                  **opts)

    # Make search domain
    search_domain = TensorProductDomain([
        ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]),
        ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]),
        ClosedInterval(0, 180)
    ])

    c_search_domain = cTensorProductDomain([
        ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]),
        ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]),
        ClosedInterval(0, 180)
    ])

    # Generate historical points
    prior = DefaultPrior(n_dims=3 + 2, num_noise=1)
    prior.tophat = TophatPrior(-2, 5)
    prior.ln_prior = NormalPrior(12.5, 1.6)
    hist_pts = cpus
    i = 0
    init_pts = search_domain.generate_uniform_random_points_in_domain(hist_pts)
    observations = -f.evaluate(init_pts)
    hist_data = HistoricalData(dim=3, num_derivatives=0)
    hist_data.append_sample_points(
        [SamplePoint(inp, o, 0.0) for o, inp in zip(observations, init_pts)])

    # Train GP model
    gp_ll = GaussianProcessLogLikelihoodMCMC(historical_data=hist_data,
                                             derivatives=[],
                                             prior=prior,
                                             chain_length=1000,
                                             burnin_steps=2000,
                                             n_hypers=2**4,
                                             noisy=False)
    gp_ll.train()

    # Initialize grad desc params
    sgd_params = cGDParams(num_multistarts=200,
                           max_num_steps=50,
                           max_num_restarts=5,
                           num_steps_averaged=4,
                           gamma=0.7,
                           pre_mult=1.0,
                           max_relative_change=0.5,
                           tolerance=1.0e-10)

    num_samples = int(cpus * 1.3)
    best_point_history = []

    # Sum of errors buffer
    var_buffer = deque(maxlen=min_samps)
    for i in np.arange(0, num_iters):

        # Optimize qEI and pick samples
        points_to_sample, ei = gen_sample_from_qei(gp_ll.models[0],
                                                   c_search_domain,
                                                   sgd_params=sgd_params,
                                                   num_samples=num_samples,
                                                   num_mc=2**10)

        # Collect observations
        sampled_points = -f.evaluate(points_to_sample)
        evidence = [
            SamplePoint(c, v, 0.0)
            for c, v in zip(points_to_sample, sampled_points)
        ]

        # Update model
        gp_ll.add_sampled_points(evidence)
        gp_ll.train()

        # Pull model and pull values
        gp = gp_ll.models[0]
        min_point = np.argmin(gp._points_sampled_value)
        min_val = np.min(gp._points_sampled_value)
        best_coord = gp.get_historical_data_copy().points_sampled[min_point]

        logging.info('Iteration {} of {}'.format(i, num_iters))
        logging.info('Recommended Points:')
        logging.info(points_to_sample)
        logging.info('Expected Improvement: {}'.format(ei))
        logging.info('Current Best:')
        logging.info(f'f(x*)= {min_val}')
        logging.info(f'Coord: {best_coord}')
        best_point_history.append(str(min_val))

        if history:
            with open(history, 'w') as buf:
                buf.write('\n'.join(best_point_history))

        # Convergence check
        if (len(var_buffer) == var_buffer.maxlen) and not skip_convergence:
            deviation = sum([abs(x - min_val) for x in var_buffer])
            if deviation < tol:
                logging.info('Convergence reached!')
                logging.info('Deviation: {}'.format(deviation))
                logging.info('History length: {}'.format(var_buffer.maxlen))
                logging.info('Tolerance: {}'.format(tol))
                break

        var_buffer.append(min_val)

    # Save position and orientation matrix
    np.savetxt(output_file, best_coord)
noise_and_cost_func = obj_func_min.noise_and_cost_func

# Load initial data from pickle
init_pts = load_init_points_for_all_IS("pickles", init_data_pickle_filename,
                                       obj_func_min._numIS)
init_vals = load_vals("pickles", init_data_pickle_filename,
                      obj_func_min._numIS)
#init_pts, init_vals = sample_initial_points.load_data_from_a_min_problem("pickles", init_data_pickle_filename)

# setup benchmark result container
multi_kg_result = BenchmarkResult(num_iterations, obj_func_max._dim,
                                  benchmark_result_table_name)
kg_hyper_param = pandas.read_sql_table(
    'multifidelity_kg_hyperparam_' + func_name,
    sql_util.sql_engine).mean(axis=0).values
kg_data = HistoricalData(obj_func_max._dim + 1)
best_sampled_val = numpy.inf
for i in range(obj_func_max._num_IS):
    IS_pts = numpy.hstack(((i + 1) * numpy.ones(len(init_pts[i])).reshape(
        (-1, 1)), init_pts[i]))

    # multiply all values by -1 since we assume that the training data stems from the minimization version
    # but misoKG uses the maximization version
    vals = -1.0 * numpy.array(init_vals[i])

    # obtain what used to be sample_vars
    noise_vars = numpy.array(
        [noise_and_cost_func(i + 1, pt)[0] for pt in init_pts[i]])
    kg_data.append_historical_data(IS_pts, vals, noise_vars)

    # find the best initial value
예제 #25
0
        # separate hypers for GP and for observational noise
        print "misoKG: repl {0}, itr {1}, best hyper: {2}".format(
            problem.replication_no, kg_iteration, best_hyper)
        ### Format: IS 0: signal variance and length scales, IS 1: signal variance and length scales, etc.
        ###  Then observational noise for IS 0, IS 1 etc.

        hyperparameters_noise = numpy.power(best_hyper[-num_IS:], 2.0)
        hypers_GP = best_hyper[:-num_IS]

        # update noise in historical data
        updated_points_sampled_noise_variance = create_array_points_sampled_noise_variance(
            current_hist_data.points_sampled, hyperparameters_noise)

        # create new Historical data object with updated values
        new_historical_data = HistoricalData(
            dim=problem.obj_func_min.getDim() +
            1)  # increased by one for index of IS
        new_historical_data.append_historical_data(
            current_hist_data.points_sampled,
            current_hist_data.points_sampled_value,
            updated_points_sampled_noise_variance)

        # Use new hyperparameters -- this requires instantiating a new GP object
        kg_cov_cpp = cppMixedSquareExponential(hyperparameters=hypers_GP)
        kg_gp_cpp = GaussianProcessNew(kg_cov_cpp,
                                       new_historical_data,
                                       num_IS_in=problem.num_is_in)
        # kg_cov_cpp is not used afterwards

    ### Find IS and point that maximize KG/cost
    discretization_points = problem.obj_func_min.get_moe_domain(
예제 #26
0
def obtainHistoricalDataForEGO(load_historical_data_from_pickle,
                               obj_func_min,
                               directoryToPickles,
                               list_IS_to_query,
                               num_init_pts_each_IS,
                               init_data_pickle_filename=''):
    '''
    Create Historical Data object for EGO that contains initial data.
    If truthIS is among the IS, then load only the data from that one
    Args:
        load_historical_data_from_pickle: if True load from pickle otherwise do a random Latin hypercube design
        obj_func_min: the problem
        directoryToPickles: path to the directory that contains the pickle files
        list_IS_to_query: list of the IS that should be queried, e.g. [0, 1, 2]
        num_init_pts_each_IS: how many points for each IS - is either used to find right pickle or to determine the number of points to sample
        init_data_pickle_filename: optional parameter that gives the filename of the pickle to load

    Returns: HistoricalData object

    '''
    historical_data = HistoricalData(obj_func_min._dim)
    if (load_historical_data_from_pickle):
        # To load the pickled data, do:
        if (init_data_pickle_filename == ''):
            init_data_pickle_filename = obj_func_min.getFuncName() + '_' + 'IS_' \
                                        + '_'.join(str(element) for element in list_IS_to_query) + '_' \
                                        + str(num_init_pts_each_IS) + "_points_each"
        init_pts_array, init_vals_array = load_data_from_a_min_problem(
            directoryToPickles, init_data_pickle_filename)

        # if truthIS is among the sampled, then load only that one:
        if obj_func_min.getTruthIS() in list_IS_to_query:
            indexArray = list_IS_to_query.index(obj_func_min.getTruthIS())
            sample_vars = [
                obj_func_min.noise_and_cost_func(obj_func_min.getTruthIS(),
                                                 pt)[0]
                for pt in init_pts_array[indexArray]
            ]
            historical_data.append_historical_data(init_pts_array[indexArray],
                                                   init_vals_array[indexArray],
                                                   sample_vars)
        else:
            # load data for all IS
            indexArray = 0
            for index_IS in list_IS_to_query:
                sample_vars = [
                    obj_func_min.noise_and_cost_func(index_IS, pt)[0]
                    for pt in init_pts_array[indexArray]
                ]
                historical_data.append_historical_data(
                    init_pts_array[indexArray], init_vals_array[indexArray],
                    sample_vars)
                indexArray += 1
    else:
        # generate initial data from querying random points for each IS
        for index_IS in list_IS_to_query:
            if (obj_func_min.getTruthIS() in list_IS_to_query) and (
                    index_IS != obj_func_min.getTruthIS()):
                continue  # the truthIS is observed but this is another IS: skip!

            search_domain = pythonTensorProductDomain([
                ClosedInterval(bound[0], bound[1])
                for bound in obj_func_min._search_domain
            ])
            pts = search_domain.generate_uniform_random_points_in_domain(
                num_init_pts_each_IS)
            vals = [obj_func_min.evaluate(index_IS, pt) for pt in pts]
            sample_vars = [
                obj_func_min.noise_and_cost_func(index_IS, pt)[0] for pt in pts
            ]
            historical_data.append_historical_data(pts, vals, sample_vars)

    return historical_data
예제 #27
0
def construct_hist_data_from_s3(bucket,
                                dim,
                                IS_key_dict,
                                combine_IS,
                                sign,
                                take_diff=False,
                                primary_IS=None):
    """
    :param bucket: amazon s3 bucket object
    :param dim: space dimension of the problem
    :type dim: int
    :param IS_key_dict: {IS: key} hashtable which provides key of the data for the corresponding IS
    :type IS_key_dict: dict
    :param combine_IS: whether construct a single HistoricalData on the space IS \times space, or a dict of HistoricalData
    objects, with each corresponds to each IS
    :type combine_IS: bool
    :param sign: sign = 1.0 means minimization problem, otherwise is maximization
    :type sign: float
    :param take_diff: whether take diff between IS_i and primary_IS, this is enabled for one approach of estimating mKG
    hyperparameters
    :type take_diff: bool
    :param primary_key: if take_diff = True, this is used to specify primary IS
    :type primary_key: int
    :return: if combine_IS = True, return a HistoricalData object, otherwise return a dict of {IS: HistoricalData}
    :rtype: HistoricalData or dict
    """
    points_dict = {}
    vals_dict = {}
    noise_dict = {}
    if take_diff:
        data = get_data_from_s3(bucket, IS_key_dict[primary_IS])
        points_dict[primary_IS] = np.array(data['points'])
        vals_dict[primary_IS] = sign * np.array(data['vals'])
        noise_dict[primary_IS] = np.array(data['noise'])
    for IS in IS_key_dict:
        if take_diff and IS != primary_IS:
            data = get_data_from_s3(bucket, IS_key_dict[IS])
            assert np.array_equal(data['points'], points_dict[primary_IS]
                                  ), "inconsistent points, cannot take diff!"
            points_dict[IS] = np.array(data['points'])
            vals_dict[IS] = sign * np.array(
                data['vals']) - vals_dict[primary_IS]
            noise_dict[IS] = np.array(data['noise']) + noise_dict[primary_IS]
        elif not take_diff:
            data = get_data_from_s3(bucket, IS_key_dict[IS])
            points_dict[IS] = np.array(data['points'])
            vals_dict[IS] = sign * np.array(data['vals'])
            noise_dict[IS] = np.array(data['noise'])
    if combine_IS:
        to_return = HistoricalData(dim=dim + 1)
        for IS in points_dict:
            num_data = len(vals_dict[IS])
            to_return.append_historical_data(
                np.hstack((IS * np.ones(num_data).reshape(
                    (-1, 1)), points_dict[IS])), vals_dict[IS], noise_dict[IS])
    else:
        to_return = {}
        for IS in points_dict:
            to_return[IS] = HistoricalData(dim=dim)
            to_return[IS].append_historical_data(points_dict[IS],
                                                 vals_dict[IS], noise_dict[IS])
    return to_return
예제 #28
0
    return (0.001, 1000) if IS == 1 else (0.01, 1)


obj_func_max = Rosenbrock(num_IS=2,
                          noise_and_cost_func=noise_and_cost_func,
                          mult=-1.0)
num_discretization = 5000
num_init_pts_all_IS = [5, 5]
num_multistart = 50
hyper_param = pandas.read_sql_table('multifidelity_kg_hyperparam_' + func_name,
                                    sql_util.sql_engine).mean(axis=0).values
search_domain = pythonTensorProductDomain([
    ClosedInterval(bound[0], bound[1]) for bound in obj_func_max._search_domain
])
### Gen initial points
data = HistoricalData(obj_func_max._dim + 1)
for i in range(obj_func_max._num_IS):
    pts = search_domain.generate_uniform_random_points_in_domain(
        num_init_pts_all_IS[i])
    vals = [obj_func_max.evaluate(i + 1, pt) for pt in pts]
    IS_pts = numpy.hstack(
        ((i + 1) * numpy.ones(num_init_pts_all_IS[i]).reshape((-1, 1)), pts))
    sample_vars = [
        obj_func_max.noise_and_cost_func(i + 1, pt)[0] for pt in pts
    ]
    data.append_historical_data(IS_pts, vals, sample_vars)
cov_func = MixedSquareExponential(hyperparameters=hyper_param,
                                  total_dim=obj_func_max._dim + 1,
                                  num_is=obj_func_max._num_IS)
gp = GaussianProcess(cov_func, data)
func_name = 'assembleToOrder'
obj_func_max = AssembleToOrder(numIS=4)
num_pts_to_gen = 100  # numpy.repeat( 250, obj_func_max.getNumIS())

hyper_bounds = [
    (0.01, 100)
    for i in range((obj_func_max.getDim() + 1) * (obj_func_max.getNumIS() + 1))
]
num_hyper_multistart = 5
search_domain = pythonTensorProductDomain([
    ClosedInterval(bound[0], bound[1])
    for bound in obj_func_max.getSearchDomain()
])

### Gen points for hyperparam estimation
data = HistoricalData(obj_func_max.getDim() +
                      1)  # should go into the objective func obj
for i in range(obj_func_max.getNumIS()):
    pts = search_domain.generate_uniform_random_points_in_domain(
        num_pts_to_gen)
    vals = [obj_func_max.evaluate(i + 1, pt) for pt in pts]
    IS_pts = numpy.hstack(((i + 1) * numpy.ones(num_pts_to_gen).reshape(
        (-1, 1)), pts))
    sample_vars = [
        obj_func_max.noise_and_cost_func(i + 1, pt)[0] for pt in pts
    ]
    data.append_historical_data(IS_pts, vals, sample_vars)

# hyperparam opt
print "start hyperparam optimization..."
hyperparam_search_domain = pythonTensorProductDomain(
    [ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds])
예제 #30
0
def construct_hist_data_from_pickle(dim,
                                    directory,
                                    IS_filename_dict,
                                    combine_IS,
                                    sign,
                                    take_diff=False,
                                    primary_key=None):
    """
    :param dim: space dimension of the problem
    :type dim: int
    :param directory: dir of the pickle files
    :type directory: str
    :param IS_filename_dict: {IS: filename} hashtable which provides name of the pickle file for the corresponding IS
    :type IS_filename_dict: dict
    :param combine_IS: whether construct a single HistoricalData on the space IS \times space, or a dict of HistoricalData
    objects, with each corresponds to each IS
    :type combine_IS: bool
    :param sign: sign = 1.0 means minimization problem, otherwise is maximization
    :type sign: float
    :param take_diff: whether take diff between IS_i and primary_IS, this is enabled for one approach of estimating mKG
    hyperparameters
    :type take_diff: bool
    :param primary_key: if take_diff = True, this is used to specify primary IS
    :type primary_key: int
    :return: if combine_IS = True, return a HistoricalData object, otherwise return a dict of {IS: HistoricalData}
    :rtype: HistoricalData or dict
    """
    points_dict = {}
    vals_dict = {}
    noise_dict = {}
    if take_diff:
        with open(
                "{0}/{1}.pickle".format(directory,
                                        IS_filename_dict[primary_key]),
                "rb") as f:
            data = pickle.load(f)
            points_dict[primary_key] = np.array(data['points'])
            vals_dict[primary_key] = sign * np.array(data['vals'])
            noise_dict[primary_key] = np.array(data['noise'])
    for key in IS_filename_dict:
        if take_diff and key != primary_key:
            with open(
                    "{0}/{1}.pickle".format(directory, IS_filename_dict[key]),
                    "rb") as f:
                data = pickle.load(f)
                assert np.array_equal(
                    data['points'], points_dict[primary_key]
                ), "inconsistent points, cannot take diff!"
                points_dict[key] = np.array(data['points'])
                vals_dict[key] = sign * np.array(
                    data['vals']) - vals_dict[primary_key]
                noise_dict[key] = np.array(
                    data['noise']) + noise_dict[primary_key]
        elif not take_diff:
            with open(
                    "{0}/{1}.pickle".format(directory, IS_filename_dict[key]),
                    "rb") as f:
                data = pickle.load(f)
                points_dict[key] = np.array(data['points'])
                vals_dict[key] = sign * np.array(data['vals'])
                noise_dict[key] = np.array(data['noise'])
    if combine_IS:
        to_return = HistoricalData(dim=dim + 1)
        for key in points_dict:
            num_data = len(vals_dict[key])
            to_return.append_historical_data(
                np.hstack((key * np.ones(num_data).reshape(
                    (-1, 1)), points_dict[key])), vals_dict[key],
                noise_dict[key])
    else:
        to_return = {}
        for key in points_dict:
            to_return[key] = HistoricalData(dim=dim)
            to_return[key].append_historical_data(points_dict[key],
                                                  vals_dict[key],
                                                  noise_dict[key])
    return to_return
func_name = 'assembleToOrder'
obj_func_min = AssembleToOrder(numIS=4, mult=-1.0)

hyper_bounds = [(0.01, 100) for i in range(obj_func_min.getDim() + 1)]
num_hyper_multistart = 3
num_pts_to_gen = 250
search_domain = pythonTensorProductDomain([
    ClosedInterval(bound[0], bound[1])
    for bound in obj_func_min.getSearchDomain()
])
cov = SquareExponential(numpy.ones(obj_func_min.getDim() + 1))

hyper_param = numpy.zeros((obj_func_min.getNumIS(), obj_func_min.getDim() + 1))
### Gen points for hyperparam estimation
for i in range(obj_func_min.getNumIS()):
    data = HistoricalData(obj_func_min.getDim())
    pts = search_domain.generate_uniform_random_points_in_domain(
        num_pts_to_gen)
    vals = [obj_func_min.evaluate(i + 1, pt) for pt in pts]
    sample_vars = [
        obj_func_min.noise_and_cost_func(i + 1, pt)[0] for pt in pts
    ]
    data.append_historical_data(pts, vals, sample_vars)
    # hyperparam opt
    hyperparam_search_domain = pythonTensorProductDomain(
        [ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds])
    multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain(
        num_hyper_multistart)
    best_f = numpy.inf
    for k in range(num_hyper_multistart):
        hyper, f, output = hyper_opt(cov,
예제 #32
0
conn = boto.connect_s3()
bucket = conn.get_bucket(s3_bucket_name, validate=True)

__author__ = 'jialeiwang'

# construct problem instance given CMD args
# format: run_pes.py ${benchmark_name} ${func_idx} ${repl_no}
argv = sys.argv[1:]
if argv[0].find("pes") < 0:
    raise ValueError("benchmark is not pes!")
problem = identify_problem(argv, bucket)

# Transform data to (0,1)^d space
lower_bounds = problem.obj_func_min._search_domain[:, 0]
upper_bounds = problem.obj_func_min._search_domain[:, 1]
transformed_data = HistoricalData(problem.obj_func_min.getDim() + 1)
for pt, val, var in zip(problem.hist_data.points_sampled,
                        problem.hist_data.points_sampled_value,
                        problem.hist_data.points_sampled_noise_variance):
    transformed_data.append_sample_points([
        [
            numpy.concatenate(
                ([pt[0]], scale_forward(pt[1:], lower_bounds, upper_bounds))),
            val, var
        ],
    ])


# entropy search begins
def noise_func(IS, x):
    return problem.obj_func_min.noise_and_cost_func(IS, x)[0]
예제 #33
0
                                              pre_mult=1.0,
                                              max_relative_change=0.7,
                                              tolerance=1.0e-3)

cpp_sgd_params_ps = cppGradientDescentParameters(num_multistarts=1,
                                                 max_num_steps=12,
                                                 max_num_restarts=1,
                                                 num_steps_averaged=3,
                                                 gamma=0.7,
                                                 pre_mult=0.01,
                                                 max_relative_change=0.01,
                                                 tolerance=1.0e-5)

if obj_func_name == "GP":
    gp_grad_info_dict = pickle.load(open('random_gp_grad_1d', 'rb'))
    hist_data_grad = HistoricalData(gp_grad_info_dict['dim'], 1)
    hist_data_grad.append_historical_data(gp_grad_info_dict['points'],
                                          gp_grad_info_dict['values'],
                                          gp_grad_info_dict['vars'])
    objective_func = synthetic_functions.RandomGP(
        gp_grad_info_dict['dim'], gp_grad_info_dict['hyper_params'],
        hist_data_grad)
    hyper_params = gp_grad_info_dict['hyper_params']
    init_pts = [[-1.5], [-1.0], [1.0], [1.5]]
    ymax = 2
elif obj_func_name == "GP_wavy":
    gp_grad_info_dict = pickle.load(open('random_gp_1d_wavy', 'rb'))
    hist_data_grad = HistoricalData(gp_grad_info_dict['dim'], 0)
    hist_data_grad.append_historical_data(gp_grad_info_dict['points'],
                                          gp_grad_info_dict['values'],
                                          gp_grad_info_dict['vars'])