def evaluate_at_point_list(
        self,
        points_to_evaluate,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
    ):
        """Evaluate Expected Improvement (1,p-EI) over a specified list of ``points_to_evaluate``.

        .. Note:: We use ``points_to_evaluate`` instead of ``self._points_to_sample`` and compute the EI at those points only.
            ``self._points_to_sample`` is unchanged.

        Generally gradient descent is preferred but when they fail to converge this may be the only "robust" option.
        This function is also useful for plotting or debugging purposes (just to get a bunch of EI values).

        :param points_to_evaluate: points at which to compute EI
        :type points_to_evaluate: array of float64 with shape (num_to_evaluate, self.dim)
        :param randomness: RNGs used by C++ to generate initial guesses and as the source of normal random numbers when monte-carlo is used
        :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
        :param max_num_threads: maximum number of threads to use, >= 1
        :type max_num_threads: int > 0
        :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
        :type status: dict
        :return: EI evaluated at each of points_to_evaluate
        :rtype: array of float64 with shape (points_to_evaluate.shape[0])

        """
        # Create enough randomness sources if none are specified.
        if randomness is None:
            if max_num_threads == 1:
                randomness = self._randomness
            else:
                randomness = C_GP.RandomnessSourceContainer(max_num_threads)
                # Set seeds based on less repeatable factors (e.g,. time)
                randomness.SetRandomizedUniformGeneratorSeed(0)
                randomness.SetRandomizedNormalRNGSeed(0)

        # status must be an initialized dict for the call to C++.
        if status is None:
            status = {}

        # num_to_sample need not match ei_evaluator.num_to_sample since points_to_evaluate
        # overrides any data inside ei_evaluator
        num_to_evaluate, num_to_sample, _ = points_to_evaluate.shape

        ei_values = C_GP.evaluate_EI_at_point_list(
            self._gaussian_process._gaussian_process,
            cpp_utils.cppify(points_to_evaluate),
            cpp_utils.cppify(self._points_being_sampled),
            num_to_evaluate,
            num_to_sample,
            self.num_being_sampled,
            self._best_so_far,
            self._num_mc_iterations,
            max_num_threads,
            randomness,
            status,
        )
        return numpy.array(ei_values)
    def __init__(self, hyperparameters_list, noise_variance_list,
                 historical_data, derivatives):
        """Construct a GaussianProcess object that knows how to call C++ for evaluation of member functions.

        :param covariance_function: covariance object encoding assumptions about the GP's behavior on our data
        :type covariance_function: :class:`moe.optimal_learning.python.interfaces.covariance_interface.CovarianceInterface` subclass
          (e.g., from :mod:`moe.optimal_learning.python.cpp_wrappers.covariance`).
        :param historical_data: object specifying the already-sampled points, the objective value at those points, and the noise variance associated with each observation
        :type historical_data: :class:`moe.optimal_learning.python.data_containers.HistoricalData` object

        """
        self._hyperparameters_list = copy.deepcopy(hyperparameters_list)

        self._num_mcmc = hyperparameters_list.shape[0]

        self._historical_data = copy.deepcopy(historical_data)

        self._noise_variance_list = copy.deepcopy(noise_variance_list)

        self._derivatives = copy.deepcopy(derivatives)

        self._num_derivatives = len(cpp_utils.cppify(self._derivatives))

        # C++ will maintain its own copy of the contents of hyperparameters and historical_data
        self._gaussian_process_mcmc = C_GP.GaussianProcessMCMC(
            cpp_utils.cppify(self._hyperparameters_list),
            cpp_utils.cppify(self._noise_variance_list),
            cpp_utils.cppify(self._historical_data.points_sampled),
            cpp_utils.cppify(self._historical_data.points_sampled_value),
            cpp_utils.cppify(self._derivatives),
            self._num_mcmc,
            self._num_derivatives,
            self._historical_data.dim,
            self._historical_data.num_sampled,
        )
Exemple #3
0
    def compute_log_likelihood(self, hyps):
        r"""Compute the objective_type measure at the specified hyperparameters.

        :return: value of log_likelihood evaluated at hyperparameters (``LL(y | X, \theta)``)
        :rtype: float64

        """
        # Bound the hyperparameter space to keep things sane. Note all
        # hyperparameters live on a log scale
        if numpy.any((-20 > hyps) + (hyps > 20)):
          return -numpy.inf
        if not self.noisy:
          hyps[(self.dim+1):] = numpy.log((1+self._num_derivatives)*[1.e-8])

        posterior = 1
        if self.prior is not None:
          posterior = self.prior.lnprob(hyps)

        hyps = numpy.exp(hyps)
        cov_hyps = hyps[:(self.dim+1)]
        noise = hyps[(self.dim+1):]

        if posterior == -numpy.inf:
            return -numpy.inf
        else:
            return posterior + C_GP.compute_log_likelihood(
                cpp_utils.cppify(self._points_sampled),
                cpp_utils.cppify(self._points_sampled_value),
                self.dim,
                self._num_sampled,
                self.objective_type,
                cpp_utils.cppify_hyperparameters(cov_hyps),
                cpp_utils.cppify(self._derivatives), self._num_derivatives,
                cpp_utils.cppify(noise),
                )
Exemple #4
0
    def compute_grad_posterior_mean(self, force_monte_carlo=False):
        r"""Compute the gradient of knowledge gradient at ``points_to_sample`` wrt ``points_to_sample``, with ``points_being_sampled`` concurrent samples.

        .. Note:: These comments were copied from
          :meth:`moe.optimal_learning.python.interfaces.expected_improvement_interface.ExpectedImprovementInterface.compute_grad_expected_improvement`

        ``points_to_sample`` is the "q" and ``points_being_sampled`` is the "p" in q,p-EI.

        In general, the expressions for gradients of EI are complex and difficult to evaluate; hence we use
        Monte-Carlo simulation to approximate it. When faster (e.g., analytic) techniques are available, we will prefer them.

        The MC computation of grad EI is similar to the computation of EI (decsribed in
        compute_expected_improvement). We differentiate ``y = \mu + Lw`` wrt ``points_to_sample``;
        only terms from the gradient of ``\mu`` and ``L`` contribute. In EI, we computed:
        ``improvement_per_step = max(max(best_so_far - y), 0.0)``
        and noted that only the smallest component of ``y`` may contribute (if it is > 0.0).
        Call this index ``winner``. Thus in computing grad EI, we only add gradient terms
        that are attributable to the ``winner``-th component of ``y``.

        :param force_monte_carlo: whether to force monte carlo evaluation (vs using fast/accurate analytic eval when possible)
        :type force_monte_carlo: boolean
        :return: gradient of EI, ``\pderiv{EI(Xq \cup Xp)}{Xq_{i,d}}`` where ``Xq`` is ``points_to_sample``
          and ``Xp`` is ``points_being_sampled`` (grad EI from sampling ``points_to_sample`` with
          ``points_being_sampled`` concurrent experiments wrt each dimension of the points in ``points_to_sample``)
        :rtype: array of float64 with shape (num_to_sample, dim)

        """
        grad_kg = C_GP.compute_grad_posterior_mean(
            self._gaussian_process._gaussian_process,
            self._num_fidelity,
            cpp_utils.cppify(self._points_to_sample),
        )
        return cpp_utils.uncppify(grad_kg, (1, self.dim - self._num_fidelity))
Exemple #5
0
def constant_liar_expected_improvement_optimization(
    ei_optimizer,
    num_multistarts,
    num_to_sample,
    lie_value,
    lie_noise_variance=0.0,
    randomness=None,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    """Heuristically solves q,0-EI using the Constant Liar policy; this wraps heuristic_expected_improvement_optimization().

    Note that this optimizer only uses the analytic 1,0-EI, so it is fast.

    See heuristic_expected_improvement_optimization() docs for general notes on how the heuristic optimization works.
    In this specific instance, we use the Constant Liar estimation policy.

    .. Note:: comments copied from ConstantLiarEstimationPolicy in gpp_heuristic_expected_improvement_optimization.hpp.

    The "Constant Liar" objective function estimation policy is the simplest: it always returns the same value
    (Ginsbourger 2008). We call this the "lie. This object also allows users to associate a noise variance to
    the lie value.

    In Ginsbourger's work, the most common lie values have been the min and max of all previously observed objective
    function values; i.e., min, max of GP.points_sampled_value. The mean has also been considered.

    He also points out that larger lie values (e.g., max of prior measurements) will lead methods like
    ComputeEstimatedSetOfPointsToSample() to be more explorative and vice versa.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,0-EI)
    :type num_to_sample: int >= 1
    :param lie_value: the "constant lie" that this estimator should return
    :type lie_value: float64
    :param lie_noise_variance: the noise_variance to associate to the lie_value (MUST be >= 0.0)
    :type lie_noise_variance: float64
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that approximately maximize the expected improvement (solving the q,0-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    estimation_policy = C_GP.ConstantLiarEstimationPolicy(
        lie_value, lie_noise_variance)
    return _heuristic_expected_improvement_optimization(
        ei_optimizer,
        num_multistarts,
        num_to_sample,
        estimation_policy,
        randomness=randomness,
        max_num_threads=max_num_threads,
        status=status,
    )
Exemple #6
0
def restarted_hyperparameter_optimization(
    log_likelihood_optimizer,
    status=None,
):
    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    # C++ expects the domain in log10 space and in list form
    domain_bounds_log10 = numpy.log10(
        log_likelihood_optimizer.domain._domain_bounds)

    hyperparameters_opt = C_GP.restarted_hyperparameter_optimization(
        log_likelihood_optimizer.optimizer_parameters,
        cpp_utils.cppify(domain_bounds_log10),
        cpp_utils.cppify(
            log_likelihood_optimizer.objective_function._points_sampled),
        cpp_utils.cppify(
            log_likelihood_optimizer.objective_function._points_sampled_value),
        log_likelihood_optimizer.objective_function.dim,
        log_likelihood_optimizer.objective_function._num_sampled,
        cpp_utils.cppify_hyperparameters(
            log_likelihood_optimizer.objective_function.cov_hyperparameters),
        cpp_utils.cppify(
            log_likelihood_optimizer.objective_function.noise_variance),
        cpp_utils.cppify(
            log_likelihood_optimizer.objective_function.derivatives),
        log_likelihood_optimizer.objective_function.num_derivatives,
        status,
    )
    return numpy.array(hyperparameters_opt)
    def evaluate_at_point_list(
            self,
            points_to_evaluate,
            randomness=None,
            max_num_threads=DEFAULT_MAX_NUM_THREADS,
            status=None,
    ):
        """Evaluate Expected Improvement (1,p-EI) over a specified list of ``points_to_evaluate``.

        .. Note:: We use ``points_to_evaluate`` instead of ``self._points_to_sample`` and compute the EI at those points only.
            ``self._points_to_sample`` is unchanged.

        Generally gradient descent is preferred but when they fail to converge this may be the only "robust" option.
        This function is also useful for plotting or debugging purposes (just to get a bunch of EI values).

        :param points_to_evaluate: points at which to compute EI
        :type points_to_evaluate: array of float64 with shape (num_to_evaluate, self.dim)
        :param randomness: RNGs used by C++ to generate initial guesses and as the source of normal random numbers when monte-carlo is used
        :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
        :param max_num_threads: maximum number of threads to use, >= 1
        :type max_num_threads: int > 0
        :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
        :type status: dict
        :return: EI evaluated at each of points_to_evaluate
        :rtype: array of float64 with shape (points_to_evaluate.shape[0])

        """
        # Create enough randomness sources if none are specified.
        if randomness is None:
            if max_num_threads == 1:
                randomness = self._randomness
            else:
                randomness = C_GP.RandomnessSourceContainer(max_num_threads)
                # Set seeds based on less repeatable factors (e.g,. time)
                randomness.SetRandomizedUniformGeneratorSeed(0)
                randomness.SetRandomizedNormalRNGSeed(0)

        # status must be an initialized dict for the call to C++.
        if status is None:
            status = {}

        # num_to_sample need not match ei_evaluator.num_to_sample since points_to_evaluate
        # overrides any data inside ei_evaluator
        num_to_evaluate, num_to_sample, _ = points_to_evaluate.shape

        ei_values = C_GP.evaluate_EI_at_point_list(
            self._gaussian_process._gaussian_process,
            cpp_utils.cppify(points_to_evaluate),
            cpp_utils.cppify(self._points_being_sampled),
            num_to_evaluate,
            num_to_sample,
            self.num_being_sampled,
            self._best_so_far,
            self._num_mc_iterations,
            max_num_threads,
            randomness,
            status,
        )
        return numpy.array(ei_values)
Exemple #8
0
    def __init__(self,
                 gaussian_process_mcmc,
                 num_to_sample,
                 points_to_sample=None,
                 points_being_sampled=None,
                 num_mc_iterations=DEFAULT_EXPECTED_IMPROVEMENT_MC_ITERATIONS,
                 randomness=None):
        """Construct an ExpectedImprovement object that knows how to call C++ for evaluation of member functions.

        :param gaussian_process: GaussianProcess describing
        :type gaussian_process: :class:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess` object
        :param points_to_sample: points at which to evaluate EI and/or its gradient to check their value in future experiments (i.e., "q" in q,p-EI)
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param points_being_sampled: points being sampled in concurrent experiments (i.e., "p" in q,p-EI)
        :type points_being_sampled: array of float64 with shape (num_being_sampled, dim)
        :param num_mc_iterations: number of monte-carlo iterations to use (when monte-carlo integration is used to compute EI)
        :type num_mc_iterations: int > 0
        :param randomness: RNGs used by C++ as the source of normal random numbers when monte-carlo is used
        :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())

        """
        self._num_mc_iterations = num_mc_iterations
        self._gaussian_process_mcmc = gaussian_process_mcmc
        self._num_to_sample = num_to_sample

        if gaussian_process_mcmc._historical_data.points_sampled_value.size > 0:
            self._best_so_far_list = gaussian_process_mcmc._num_mcmc * [
                numpy.amin(gaussian_process_mcmc._historical_data.
                           points_sampled_value[:, 0])
            ]
            # self._best_so_far = numpy.amin(gaussian_process._historical_data.points_sampled_value)
        else:
            self._best_so_far_list = gaussian_process_mcmc._num_mcmc * [
                numpy.finfo(numpy.float64).max
            ]

        if points_being_sampled is None:
            self._points_being_sampled = numpy.array([])
        else:
            self._points_being_sampled = numpy.copy(points_being_sampled)

        if points_to_sample is None:
            # set an arbitrary point
            self.current_point = numpy.zeros(
                (self._num_to_sample, gaussian_process_mcmc.dim))
        else:
            self.current_point = points_to_sample

        if randomness is None:
            self._randomness = C_GP.RandomnessSourceContainer(
                1)  # create randomness for only 1 thread
            # Set seed based on less repeatable factors (e.g,. time)
            self._randomness.SetRandomizedUniformGeneratorSeed(0)
            self._randomness.SetRandomizedNormalRNGSeed(0)
        else:
            self._randomness = randomness

        self.objective_type = None  # Not used for EI, but the field is expected in C++
    def __init__(
        self,
        gaussian_process,
        discrete_pts,
        noise,
        points_to_sample=None,
        points_being_sampled=None,
        num_mc_iterations=DEFAULT_EXPECTED_IMPROVEMENT_MC_ITERATIONS,
        randomness=None,
    ):
        """Construct a KnowledgeGradient object that supports q,p-KG.
        TODO(GH-56): Allow callers to pass in a source of randomness.
        :param gaussian_process: GaussianProcess describing
        :type gaussian_process: interfaces.gaussian_process_interface.GaussianProcessInterface subclass
        :param discrete_pts: a discrete set of points to approximate the KG
        :type discrete_pts: array of float64 with shape (num_pts, dim)
        :param noise: measurement noise
        :type noise: float64
        :param points_to_sample: points at which to evaluate KG and/or its gradient to check their value in future experiments (i.e., "q" in q,p-KG)
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param points_being_sampled: points being sampled in concurrent experiments (i.e., "p" in q,p-KG)
        :type points_being_sampled: array of float64 with shape (num_being_sampled, dim)
        :param num_mc_iterations: number of monte-carlo iterations to use (when monte-carlo integration is used to compute KG)
        :type num_mc_iterations: int > 0
        :param randomness: random source(s) used for monte-carlo integration (when applicable) (UNUSED)
        :type randomness: (UNUSED)
        """
        self._num_mc_iterations = num_mc_iterations
        self._gaussian_process = gaussian_process
        self._discrete_pts = numpy.copy(discrete_pts)
        self._noise = noise
        self._mu_star = self._gaussian_process.compute_mean_of_additional_points(
            self._discrete_pts)
        self._best_so_far = numpy.amin(self._mu_star)

        if points_being_sampled is None:
            self._points_being_sampled = numpy.array([])
        else:
            self._points_being_sampled = numpy.copy(points_being_sampled)

        if points_to_sample is None:
            self._points_to_sample = numpy.zeros(
                (1, self._gaussian_process.dim))
        else:
            self._points_to_sample = points_to_sample

        self._num_to_sample = points_to_sample.shape[0]

        if randomness is None:
            self._randomness = C_GP.RandomnessSourceContainer(
                1)  # create randomness for only 1 thread
            # Set seed based on less repeatable factors (e.g,. time)
            self._randomness.SetRandomizedUniformGeneratorSeed(0)
            self._randomness.SetRandomizedNormalRNGSeed(0)
        else:
            self._randomness = randomness

        self.objective_type = None  # Not used for KG, but the field is expected in C++
    def compute_knowledge_gradient_mcmc(self, force_monte_carlo=False):
        r"""Compute the knowledge gradient at ``points_to_sample``, with ``points_being_sampled`` concurrent points being sampled.

        .. Note:: These comments were copied from
          :meth:`moe.optimal_learning.python.interfaces.expected_improvement_interface.ExpectedImprovementInterface.compute_expected_improvement`

        ``points_to_sample`` is the "q" and ``points_being_sampled`` is the "p" in q,p-EI.

        Computes the knowledge gradient ``EI(Xs) = E_n[[f^*_n(X) - min(f(Xs_1),...,f(Xs_m))]^+]``, where ``Xs``
        are potential points to sample (union of ``points_to_sample`` and ``points_being_sampled``) and ``X`` are
        already sampled points.  The ``^+`` indicates that the expression in the expectation evaluates to 0 if it
        is negative.  ``f^*(X)`` is the MINIMUM over all known function evaluations (``points_sampled_value``),
        whereas ``f(Xs)`` are *GP-predicted* function evaluations.

        In words, we are computing the knowledge gradient (over the current ``best_so_far``, best known
        objective function value) that would result from sampling (aka running new experiments) at
        ``points_to_sample`` with ``points_being_sampled`` concurrent/ongoing experiments.

        In general, the EI expression is complex and difficult to evaluate; hence we use Monte-Carlo simulation to approximate it.
        When faster (e.g., analytic) techniques are available, we will prefer them.

        The idea of the MC approach is to repeatedly sample at the union of ``points_to_sample`` and
        ``points_being_sampled``. This is analogous to gaussian_process_interface.sample_point_from_gp,
        but we sample ``num_union`` points at once:
        ``y = \mu + Lw``
        where ``\mu`` is the GP-mean, ``L`` is the ``chol_factor(GP-variance)`` and ``w`` is a vector
        of ``num_union`` draws from N(0, 1). Then:
        ``improvement_per_step = max(max(best_so_far - y), 0.0)``
        Observe that the inner ``max`` means only the smallest component of ``y`` contributes in each iteration.
        We compute the improvement over many random draws and average.

        :param force_monte_carlo: whether to force monte carlo evaluation (vs using fast/accurate analytic eval when possible)
        :type force_monte_carlo: boolean
        :return: the knowledge gradient from sampling ``points_to_sample`` with ``points_being_sampled`` concurrent experiments
        :rtype: float64

        """

        knowledge_gradient_mcmc = C_GP.compute_knowledge_gradient_mcmc(
            self._gaussian_process_mcmc._gaussian_process_mcmc,
            self._num_fidelity,
            self._inner_optimizer.optimizer_parameters,
            cpp_utils.cppify(self._inner_optimizer.domain.domain_bounds),
            cpp_utils.cppify(self._discrete_pts_list),
            cpp_utils.cppify(self._points_to_sample),
            cpp_utils.cppify(self._points_being_sampled),
            self.discrete,
            self.num_to_sample,
            self.num_being_sampled,
            self._num_mc_iterations,
            cpp_utils.cppify(self._best_so_far_list),
            self._randomness,
        )
        return knowledge_gradient_mcmc
    def compute_expected_improvement(self, force_monte_carlo=False):
        r"""Compute the expected improvement at ``points_to_sample``, with ``points_being_sampled`` concurrent points being sampled.

        .. Note:: These comments were copied from
          :meth:`moe.optimal_learning.python.interfaces.expected_improvement_interface.ExpectedImprovementInterface.compute_expected_improvement`

        ``points_to_sample`` is the "q" and ``points_being_sampled`` is the "p" in q,p-EI.

        Computes the expected improvement ``EI(Xs) = E_n[[f^*_n(X) - min(f(Xs_1),...,f(Xs_m))]^+]``, where ``Xs``
        are potential points to sample (union of ``points_to_sample`` and ``points_being_sampled``) and ``X`` are
        already sampled points.  The ``^+`` indicates that the expression in the expectation evaluates to 0 if it
        is negative.  ``f^*(X)`` is the MINIMUM over all known function evaluations (``points_sampled_value``),
        whereas ``f(Xs)`` are *GP-predicted* function evaluations.

        In words, we are computing the expected improvement (over the current ``best_so_far``, best known
        objective function value) that would result from sampling (aka running new experiments) at
        ``points_to_sample`` with ``points_being_sampled`` concurrent/ongoing experiments.

        In general, the EI expression is complex and difficult to evaluate; hence we use Monte-Carlo simulation to approximate it.
        When faster (e.g., analytic) techniques are available, we will prefer them.

        The idea of the MC approach is to repeatedly sample at the union of ``points_to_sample`` and
        ``points_being_sampled``. This is analogous to gaussian_process_interface.sample_point_from_gp,
        but we sample ``num_union`` points at once:
        ``y = \mu + Lw``
        where ``\mu`` is the GP-mean, ``L`` is the ``chol_factor(GP-variance)`` and ``w`` is a vector
        of ``num_union`` draws from N(0, 1). Then:
        ``improvement_per_step = max(max(best_so_far - y), 0.0)``
        Observe that the inner ``max`` means only the smallest component of ``y`` contributes in each iteration.
        We compute the improvement over many random draws and average.

        :param force_monte_carlo: whether to force monte carlo evaluation (vs using fast/accurate analytic eval when possible)
        :type force_monte_carlo: boolean
        :return: the expected improvement from sampling ``points_to_sample`` with ``points_being_sampled`` concurrent experiments
        :rtype: float64

        """
        return C_GP.compute_expected_improvement(
            self._gaussian_process._gaussian_process,
            cpp_utils.cppify(self._points_to_sample),
            cpp_utils.cppify(self._points_being_sampled),
            self.num_to_sample,
            self.num_being_sampled,
            self._num_mc_iterations,
            self._best_so_far,
            force_monte_carlo,
            self._randomness,
        )
Exemple #12
0
def evaluate_log_likelihood_at_hyperparameter_list(
    log_likelihood_evaluator,
    hyperparameters_to_evaluate,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    """Compute the specified log likelihood measure at each input set of hyperparameters.

    Generally Newton or gradient descent is preferred but when they fail to converge this may be the only "robust" option.
    This function is also useful for plotting or debugging purposes (just to get a bunch of log likelihood values).

    Calls into evaluate_log_likelihood_at_hyperparameter_list() in cpp/GPP_python_model_selection.cpp.

    :param log_likelihood_evaluator: object specifying which log likelihood measure to evaluate
    :type log_likelihood_evaluator: cpp_wrappers.log_likelihood.LogLikelihood
    :param hyperparameters_to_evaluate: the hyperparameters at which to compute the specified log likelihood
    :type hyperparameters_to_evaluate: array of float64 with shape (num_to_eval, log_likelihood_evaluator.num_hyperparameters)
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: log likelihood value at each specified set of hyperparameters
    :rtype: array of float64 with shape (hyperparameters_to_evaluate.shape[0])

    """
    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    # We could just call log_likelihood_evaluator.compute_log_likelihood() in a loop, but instead we do
    # the looping in C++ where it can be multithreaded.
    log_likelihood_list = C_GP.evaluate_log_likelihood_at_hyperparameter_list(
        cpp_utils.cppify(hyperparameters_to_evaluate),
        cpp_utils.cppify(log_likelihood_evaluator._points_sampled),
        cpp_utils.cppify(log_likelihood_evaluator._points_sampled_value),
        log_likelihood_evaluator.dim,
        log_likelihood_evaluator._num_sampled,
        log_likelihood_evaluator.objective_type,
        cpp_utils.cppify_hyperparameters(
            log_likelihood_evaluator.cov_hyperparameters),
        cpp_utils.cppify(log_likelihood_evaluator.noise_variance),
        cpp_utils.cppify(log_likelihood_evaluator.derivatives),
        log_likelihood_evaluator.num_derivatives,
        hyperparameters_to_evaluate.shape[0],
        max_num_threads,
        status,
    )
    return numpy.array(log_likelihood_list)
Exemple #13
0
    def compute_log_likelihood(self):
        r"""Compute the objective_type measure at the specified hyperparameters.

        :return: value of log_likelihood evaluated at hyperparameters (``LL(y | X, \theta)``)
        :rtype: float64

        """
        return C_GP.compute_log_likelihood(
            cpp_utils.cppify(self._points_sampled),
            cpp_utils.cppify(self._points_sampled_value),
            self.dim,
            self._num_sampled,
            self.objective_type,
            cpp_utils.cppify_hyperparameters(self.hyperparameters),
            cpp_utils.cppify(self._points_sampled_noise_variance),
        )
Exemple #14
0
    def compute_log_likelihood(self):
        r"""Compute the objective_type measure at the specified hyperparameters.

        :return: value of log_likelihood evaluated at hyperparameters (``LL(y | X, \theta)``)
        :rtype: float64

        """
        return C_GP.compute_log_likelihood(
            cpp_utils.cppify(self._points_sampled),
            cpp_utils.cppify(self._points_sampled_value),
            self.dim,
            self._num_sampled,
            self.objective_type,
            cpp_utils.cppify_hyperparameters(self.hyperparameters),
            cpp_utils.cppify(self._points_sampled_noise_variance),
        )
Exemple #15
0
    def compute_grad_log_likelihood(self):
        r"""Compute the gradient (wrt hyperparameters) of the objective_type measure at the specified hyperparameters.

        :return: grad_log_likelihood: i-th entry is ``\pderiv{LL(y | X, \theta)}{\theta_i}``
        :rtype: array of float64 with shape (num_hyperparameters)

        """
        grad_log_marginal = C_GP.compute_hyperparameter_grad_log_likelihood(
            cpp_utils.cppify(self._points_sampled),
            cpp_utils.cppify(self._points_sampled_value),
            self.dim,
            self._num_sampled,
            self.objective_type,
            cpp_utils.cppify_hyperparameters(self.hyperparameters),
            cpp_utils.cppify(self._points_sampled_noise_variance),
        )
        return numpy.array(grad_log_marginal)
Exemple #16
0
    def compute_grad_log_likelihood(self):
        r"""Compute the gradient (wrt hyperparameters) of the objective_type measure at the specified hyperparameters.

        :return: grad_log_likelihood: i-th entry is ``\pderiv{LL(y | X, \theta)}{\theta_i}``
        :rtype: array of float64 with shape (num_hyperparameters)

        """
        grad_log_marginal = C_GP.compute_hyperparameter_grad_log_likelihood(
            cpp_utils.cppify(self._points_sampled),
            cpp_utils.cppify(self._points_sampled_value),
            self.dim,
            self._num_sampled,
            self.objective_type,
            cpp_utils.cppify_hyperparameters(self.hyperparameters),
            cpp_utils.cppify(self._points_sampled_noise_variance),
        )
        return numpy.array(grad_log_marginal)
Exemple #17
0
def evaluate_log_likelihood_at_hyperparameter_list(
        log_likelihood_evaluator,
        hyperparameters_to_evaluate,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    """Compute the specified log likelihood measure at each input set of hyperparameters.

    Generally Newton or gradient descent is preferred but when they fail to converge this may be the only "robust" option.
    This function is also useful for plotting or debugging purposes (just to get a bunch of log likelihood values).

    Calls into evaluate_log_likelihood_at_hyperparameter_list() in cpp/GPP_python_model_selection.cpp.

    :param log_likelihood_evaluator: object specifying which log likelihood measure to evaluate
    :type log_likelihood_evaluator: cpp_wrappers.log_likelihood.LogLikelihood
    :param hyperparameters_to_evaluate: the hyperparameters at which to compute the specified log likelihood
    :type hyperparameters_to_evaluate: array of float64 with shape (num_to_eval, log_likelihood_evaluator.num_hyperparameters)
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: log likelihood value at each specified set of hyperparameters
    :rtype: array of float64 with shape (hyperparameters_to_evaluate.shape[0])

    """
    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    # We could just call log_likelihood_evaluator.compute_log_likelihood() in a loop, but instead we do
    # the looping in C++ where it can be multithreaded.
    log_likelihood_list = C_GP.evaluate_log_likelihood_at_hyperparameter_list(
        cpp_utils.cppify(hyperparameters_to_evaluate),
        cpp_utils.cppify(log_likelihood_evaluator._points_sampled),
        cpp_utils.cppify(log_likelihood_evaluator._points_sampled_value),
        log_likelihood_evaluator.dim,
        log_likelihood_evaluator._num_sampled,
        log_likelihood_evaluator.objective_type,
        cpp_utils.cppify_hyperparameters(log_likelihood_evaluator.hyperparameters),
        cpp_utils.cppify(log_likelihood_evaluator._points_sampled_noise_variance),
        hyperparameters_to_evaluate.shape[0],
        max_num_threads,
        status,
    )
    return numpy.array(log_likelihood_list)
    def compute_grad_expected_improvement(self, force_monte_carlo=False):
        r"""Compute the gradient of expected improvement at ``points_to_sample`` wrt ``points_to_sample``, with ``points_being_sampled`` concurrent samples.

        .. Note:: These comments were copied from
          :meth:`moe.optimal_learning.python.interfaces.expected_improvement_interface.ExpectedImprovementInterface.compute_grad_expected_improvement`

        ``points_to_sample`` is the "q" and ``points_being_sampled`` is the "p" in q,p-EI.

        In general, the expressions for gradients of EI are complex and difficult to evaluate; hence we use
        Monte-Carlo simulation to approximate it. When faster (e.g., analytic) techniques are available, we will prefer them.

        The MC computation of grad EI is similar to the computation of EI (decsribed in
        compute_expected_improvement). We differentiate ``y = \mu + Lw`` wrt ``points_to_sample``;
        only terms from the gradient of ``\mu`` and ``L`` contribute. In EI, we computed:
        ``improvement_per_step = max(max(best_so_far - y), 0.0)``
        and noted that only the smallest component of ``y`` may contribute (if it is > 0.0).
        Call this index ``winner``. Thus in computing grad EI, we only add gradient terms
        that are attributable to the ``winner``-th component of ``y``.

        :param force_monte_carlo: whether to force monte carlo evaluation (vs using fast/accurate analytic eval when possible)
        :type force_monte_carlo: boolean
        :return: gradient of EI, ``\pderiv{EI(Xq \cup Xp)}{Xq_{i,d}}`` where ``Xq`` is ``points_to_sample``
          and ``Xp`` is ``points_being_sampled`` (grad EI from sampling ``points_to_sample`` with
          ``points_being_sampled`` concurrent experiments wrt each dimension of the points in ``points_to_sample``)
        :rtype: array of float64 with shape (num_to_sample, dim)

        """
        grad_ei = C_GP.compute_grad_expected_improvement(
            self._gaussian_process._gaussian_process,
            cpp_utils.cppify(self._points_to_sample),
            cpp_utils.cppify(self._points_being_sampled),
            self.num_to_sample,
            self.num_being_sampled,
            self._num_mc_iterations,
            self._best_so_far,
            force_monte_carlo,
            self._randomness,
        )
        return cpp_utils.uncppify(grad_ei, (self.num_to_sample, self.dim))
    def __init__(
            self,
            gaussian_process_list,
            num_fidelity,
            points_to_sample=None,
            randomness=None,
    ):
        self._gaussian_process_list = gaussian_process_list
        self._num_fidelity = num_fidelity

        if points_to_sample is None:
            self._points_to_sample = numpy.zeros((1, self._gaussian_process_list[0].dim))

        if randomness is None:
            self._randomness = C_GP.RandomnessSourceContainer(1)  # create randomness for only 1 thread
            # Set seed based on less repeatable factors (e.g,. time)
            self._randomness.SetRandomizedUniformGeneratorSeed(0)
            self._randomness.SetRandomizedNormalRNGSeed(0)
        else:
            self._randomness = randomness

        self.objective_type = None  # Not used for KG, but the field is expected in C++
Exemple #20
0
def multistart_hyperparameter_optimization(
        log_likelihood_optimizer,
        num_multistarts,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain.

    .. Note:: The following comments are copied to
      :mod:`moe.optimal_learning.python.python_version.log_likelihood.multistart_hyperparameter_optimization`.

    See :class:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.GaussianProcessLogMarginalLikelihood` and
    :class:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood`
    for an overview of some example log likelihood-like measures.

    Optimizers are: null ('dumb' search), gradient descent, newton
    Newton is the suggested optimizer.

    'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points'
    (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling.
    The hyperparameter_optimizer_parameters input specifies the desired optimization technique as well as parameters controlling
    its behavior (see :mod:`moe.optimal_learning.python.cpp_wrappers.optimization`).

    See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types.

    Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the
    coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for
    sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely.

    Note that the domain here must be specified in LOG-10 SPACE!

    Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a
    true optima (i.e., the gradient may be substantially nonzero).

    .. WARNING:: this function fails if NO improvement can be found!  In that case,
       the output will always be the first randomly chosen point. status will report failure.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) log likelihood over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in log_likelihood_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: hyperparameters that maximize the specified log likelihood measure within the specified domain
    :rtype: array of float64 with shape (log_likelihood_optimizer.objective_function.num_hyperparameters)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seed based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    # C++ expects the domain in log10 space and in list form
    domain_bounds_log10 = numpy.log10(log_likelihood_optimizer.domain._domain_bounds)

    hyperparameters_opt = C_GP.multistart_hyperparameter_optimization(
        log_likelihood_optimizer.optimizer_parameters,
        cpp_utils.cppify(domain_bounds_log10),
        cpp_utils.cppify(log_likelihood_optimizer.objective_function._points_sampled),
        cpp_utils.cppify(log_likelihood_optimizer.objective_function._points_sampled_value),
        log_likelihood_optimizer.objective_function.dim,
        log_likelihood_optimizer.objective_function._num_sampled,
        cpp_utils.cppify_hyperparameters(log_likelihood_optimizer.objective_function.hyperparameters),
        cpp_utils.cppify(log_likelihood_optimizer.objective_function._points_sampled_noise_variance),
        max_num_threads,
        randomness,
        status,
    )
    return numpy.array(hyperparameters_opt)
Exemple #21
0
def kriging_believer_expected_improvement_optimization(
    ei_optimizer,
    num_multistarts,
    num_to_sample,
    std_deviation_coef=0.0,
    kriging_noise_variance=0.0,
    randomness=None,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    """Heuristically solves q,0-EI using the Kriging Believer policy; this wraps heuristic_expected_improvement_optimization().

    Note that this optimizer only uses the analytic 1,0-EI, so it is fast.

    See heuristic_expected_improvement_optimization() docs for general notes on how the heuristic optimization works.
    In this specific instance, we use the Kriging Believer estimation policy.

    .. Note:: comments copied from KrigingBelieverEstimationPolicy in gpp_heuristic_expected_improvement_optimization.hpp.

    The "Kriging Believer" objective function estimation policy uses the Gaussian Process (i.e., the prior)
    to produce objective function estimates. The simplest method is to trust the GP completely:
    estimate = GP.mean(point)
    This follows the usage in Ginsbourger 2008. Users may also want the estimate to depend on the GP variance
    at the evaluation point, so that the estimate reflects how confident the GP is in the prediction. Users may
    also specify std_devation_ceof:
    estimate = GP.mean(point) + std_deviation_coef * GP.variance(point)
    Note that the coefficient is signed, and analogously to ConstantLiar, larger positive values are more
    explorative and larger negative values are more exploitive.

    This object also allows users to associate a noise variance to the lie value.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,0-EI)
    :type num_to_sample: int >= 1
    :param std_deviation_coef: the relative amount of bias (in units of GP std deviation) to introduce into the GP mean
    :type std_deviation_coef: float64
    :param kriging_noise_variance: the noise_variance to associate to each function value estimate (MUST be >= 0.0)
    :type kriging_noise_variance: float64
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that approximately maximize the expected improvement (solving the q,0-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    estimation_policy = C_GP.KrigingBelieverEstimationPolicy(
        std_deviation_coef, kriging_noise_variance)
    return _heuristic_expected_improvement_optimization(
        ei_optimizer,
        num_multistarts,
        num_to_sample,
        estimation_policy,
        randomness=randomness,
        max_num_threads=max_num_threads,
        status=status,
    )
def multistart_expected_improvement_optimization(
        ei_optimizer,
        num_multistarts,
        num_to_sample,
        use_gpu=False,
        which_gpu=0,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    """Solve the q,p-EI problem, returning the optimal set of q points to sample CONCURRENTLY in future experiments.

    When ``points_being_sampled.size == 0 && num_to_sample == 1``, this function will use (fast) analytic EI computations.

    .. NOTE:: The following comments are copied from gpp_math.hpp, ComputeOptimalPointsToSample().
      These comments are copied into
      :func:`moe.optimal_learning.python.python_version.expected_improvement.multistart_expected_improvement_optimization`

    This is the primary entry-point for EI optimization in the optimal_learning library. It offers our best shot at
    improving robustness by combining higher accuracy methods like gradient descent with fail-safes like random/grid search.

    Returns the optimal set of q points to sample CONCURRENTLY by solving the q,p-EI problem.  That is, we may want to run 4
    experiments at the same time and maximize the EI across all 4 experiments at once while knowing of 2 ongoing experiments
    (4,2-EI). This function handles this use case. Evaluation of q,p-EI (and its gradient) for q > 1 or p > 1 is expensive
    (requires monte-carlo iteration), so this method is usually very expensive.

    Compared to ComputeHeuristicPointsToSample() (``gpp_heuristic_expected_improvement_optimization.hpp``), this function
    makes no external assumptions about the underlying objective function. Instead, it utilizes a feature of the
    GaussianProcess that allows the GP to account for ongoing/incomplete experiments.

    If ``num_to_sample = 1``, this is the same as ComputeOptimalPointsToSampleWithRandomStarts().

    The option of using GPU to compute general q,p-EI via MC simulation is also available. To enable it, make sure you have
    installed GPU components of MOE, otherwise, it will throw Runtime excpetion.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,p-EI)
    :type num_to_sample: int >= 1
    :param use_gpu: set to True if user wants to use GPU for MC simulation
    :type use_gpu: bool
    :param which_gpu: GPU device ID
    :type which_gpu: int >= 0
    :param randomness: RNGs used by C++ to generate initial guesses and as the source of normal random numbers when monte-carlo is used
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that maximize the expected improvement (solving the q,p-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seeds based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.multistart_expected_improvement_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process._gaussian_process,
        cpp_utils.cppify(ei_optimizer.domain.domain_bounds),
        cpp_utils.cppify(ei_optimizer.objective_function._points_being_sampled),
        num_to_sample,
        ei_optimizer.objective_function.num_being_sampled,
        ei_optimizer.objective_function._best_so_far,
        ei_optimizer.objective_function._num_mc_iterations,
        max_num_threads,
        use_gpu,
        which_gpu,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(best_points_to_sample, (num_to_sample, ei_optimizer.objective_function.dim))
def _heuristic_expected_improvement_optimization(
        ei_optimizer,
        num_multistarts,
        num_to_sample,
        estimation_policy,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    r"""Heuristically solve the q,0-EI problem (estimating multistart_expected_improvement_optimization()) using 1,0-EI solves.

    Consider this as an alternative when multistart_expected_improvement_optimization() is too expensive. Since this function
    kernalizes 1,0-EI, it always hits the analytic case; hence it is much faster than q,0-EI which requires monte-carlo.
    Users will probably call one of this function's wrappers (e.g., constant_liar_expected_improvement_optimization() or
    kriging_believer_expected_improvement_optimization()) instead of accessing this directly.

    Calls into heuristic_expected_improvement_optimization_wrapper in cpp/GPP_python_expected_improvement.cpp.

    .. NOTE:: The following comments are copied from gpp_heuristic_expected_improvement_optimization.hpp, ComputeHeuristicPointsToSample().

    It heuristically solves the q,0-EI optimization problem. As a reminder, that problem is finding the set of q points
    that maximizes the Expected Improvement (saved in the output, ``best_points_to_sample``). Solving for q points simultaneously
    usually requires monte-carlo iteration and is expensive. The heuristic here solves q-EI as a sequence of 1-EI problems.
    We solve 1-EI, and then we *ASSUME* an objective function value at the resulting optima. This process is repeated q times.
    It is perhaps more clear in pseudocode::

      points_being_sampled = {}  // This stays empty! We are only working with 1,0-EI solves
      for i = 0:num_to_sample-1 {
        // First, solve the 1,0-EI problem\*
        new_point = ComputeOptimalPointsToSampleWithRandomStarts(gaussian_process, points_being_sampled, other_parameters)
        // *Estimate* the objective function value at new_point
        new_function_value = ESTIMATED_OBJECTIVE_FUNCTION_VALUE(new_point, other_args)
        new_function_value_noise = ESTIMATED_NOISE_VARIANCE(new_point, other_args)
        // Write the estimated objective values to the GP as *truth*
        gaussian_process.AddPoint(new_point, new_function_value, new_function_value_noise)
        optimal_points_to_sample.append(new_point)
      }

    \*Recall: each call to ComputeOptimalPointsToSampleWithRandomStarts() (gpp_math.hpp) kicks off a round of MGD optimization of 1-EI.

    Note that ideally the estimated objective function value (and noise) would be measured from the real-world (e.g.,
    by running an experiment). Then this algorithm would be optimal. However, the estimate probably is not accurately
    representating of the true objective.

    The estimation is handled through the "estimation_policy" input. Passing a ConstantLiarEstimationPolicy or
    KrigingBelieverEstimationPolicy object to this function will produce the "Constant Liar" and "Kriging Believer"
    heuristics described in Ginsbourger 2008. The interface for estimation_policy is generic so users may specify
    other estimators as well.

    Contrast this approach with ComputeOptimalPointsToSample() (gpp_math.hpp) which solves all outputs of the q,0-EI
    problem simultaneously instead of one point at a time. That method is more accurate (b/c it
    does not attempt to estimate the behavior of the underlying objective function) but much more expensive (because it
    requires monte-carlo iteration).

    If ``num_to_sample = 1``, this is exactly the same as ComputeOptimalPointsToSampleWithRandomStarts(); i.e.,
    both methods solve the 1-EI optimization problem the same way.

    Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the
    coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for
    sizing the domain and num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely.

    Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a
    local optima (i.e., the gradient may be substantially nonzero).

    .. WARNING:: this function fails if any step fails to find improvement! In that case, the return should not be
           read and status will report false.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,0-EI)
    :type num_to_sample: int >= 1
    :param estimation_policy: the policy to use to produce (heuristic) objective function estimates during q,0-EI optimization
    :type estimation_policy: subclass of ObjectiveEstimationPolicyInterface (C++ pure abstract class)
       e.g., C_GP.KrigingBelieverEstimationPolicy, C_GP.ConstantLiarEstimationPolicy
       See gpp_heuristic_expected_improvement_optimization.hpp
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that approximately maximize the expected improvement (solving the q,0-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seed based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.heuristic_expected_improvement_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process._gaussian_process,
        cpp_utils.cppify(ei_optimizer.domain._domain_bounds),
        estimation_policy,
        num_to_sample,
        ei_optimizer.objective_function._best_so_far,
        max_num_threads,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(best_points_to_sample, (num_to_sample, ei_optimizer.objective_function.dim))
Exemple #24
0
 def test_exception_thrown_from_cpp(self):
     """Test that a C++ interface function throws the expected type."""
     with pytest.raises(C_GP.BoundsException):
         C_GP.GaussianProcess([-1.0, [1.0]], [], [], [], 1, 0)
Exemple #25
0
def multistart_expected_improvement_mcmc_optimization(
    ei_optimizer,
    num_multistarts,
    num_to_sample,
    randomness=None,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    """Solve the q,p-KG problem, returning the optimal set of q points to sample CONCURRENTLY in future experiments.

    .. NOTE:: The following comments are copied from gpp_math.hpp, ComputeOptimalPointsToSample().
      These comments are copied into
      :func:`moe.optimal_learning.python.python_version.expected_improvement.multistart_expected_improvement_optimization`

    This is the primary entry-point for EI optimization in the optimal_learning library. It offers our best shot at
    improving robustness by combining higher accuracy methods like gradient descent with fail-safes like random/grid search.

    Returns the optimal set of q points to sample CONCURRENTLY by solving the q,p-EI problem.  That is, we may want to run 4
    experiments at the same time and maximize the EI across all 4 experiments at once while knowing of 2 ongoing experiments
    (4,2-EI). This function handles this use case. Evaluation of q,p-EI (and its gradient) for q > 1 or p > 1 is expensive
    (requires monte-carlo iteration), so this method is usually very expensive.

    Compared to ComputeHeuristicPointsToSample() (``gpp_heuristic_expected_improvement_optimization.hpp``), this function
    makes no external assumptions about the underlying objective function. Instead, it utilizes a feature of the
    GaussianProcess that allows the GP to account for ongoing/incomplete experiments.

    If ``num_to_sample = 1``, this is the same as ComputeOptimalPointsToSampleWithRandomStarts().

    The option of using GPU to compute general q,p-EI via MC simulation is also available. To enable it, make sure you have
    installed GPU components of MOE, otherwise, it will throw Runtime excpetion.

    :param kg_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type kg_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,p-EI)
    :type num_to_sample: int >= 1
    :param use_gpu: set to True if user wants to use GPU for MC simulation
    :type use_gpu: bool
    :param which_gpu: GPU device ID
    :type which_gpu: int >= 0
    :param randomness: RNGs used by C++ to generate initial guesses and as the source of normal random numbers when monte-carlo is used
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that maximize the knowledge gradient (solving the q,p-KG problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seeds based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.multistart_expected_improvement_mcmc_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process_mcmc.
        _gaussian_process_mcmc,
        cpp_utils.cppify(ei_optimizer.domain.domain_bounds),
        cpp_utils.cppify(
            ei_optimizer.objective_function._points_being_sampled),
        num_to_sample,
        ei_optimizer.objective_function.num_being_sampled,
        cpp_utils.cppify(
            numpy.array(ei_optimizer.objective_function._best_so_far_list)),
        ei_optimizer.objective_function._num_mc_iterations,
        max_num_threads,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(
        best_points_to_sample,
        (num_to_sample, ei_optimizer.objective_function.dim))
Exemple #26
0
def _heuristic_expected_improvement_optimization(
    ei_optimizer,
    num_multistarts,
    num_to_sample,
    estimation_policy,
    randomness=None,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    r"""Heuristically solve the q,0-EI problem (estimating multistart_expected_improvement_optimization()) using 1,0-EI solves.

    Consider this as an alternative when multistart_expected_improvement_optimization() is too expensive. Since this function
    kernalizes 1,0-EI, it always hits the analytic case; hence it is much faster than q,0-EI which requires monte-carlo.
    Users will probably call one of this function's wrappers (e.g., constant_liar_expected_improvement_optimization() or
    kriging_believer_expected_improvement_optimization()) instead of accessing this directly.

    Calls into heuristic_expected_improvement_optimization_wrapper in cpp/GPP_python_expected_improvement.cpp.

    .. NOTE:: The following comments are copied from gpp_heuristic_expected_improvement_optimization.hpp, ComputeHeuristicPointsToSample().

    It heuristically solves the q,0-EI optimization problem. As a reminder, that problem is finding the set of q points
    that maximizes the Expected Improvement (saved in the output, ``best_points_to_sample``). Solving for q points simultaneously
    usually requires monte-carlo iteration and is expensive. The heuristic here solves q-EI as a sequence of 1-EI problems.
    We solve 1-EI, and then we *ASSUME* an objective function value at the resulting optima. This process is repeated q times.
    It is perhaps more clear in pseudocode::

      points_being_sampled = {}  // This stays empty! We are only working with 1,0-EI solves
      for i = 0:num_to_sample-1 {
        // First, solve the 1,0-EI problem\*
        new_point = ComputeOptimalPointsToSampleWithRandomStarts(gaussian_process, points_being_sampled, other_parameters)
        // *Estimate* the objective function value at new_point
        new_function_value = ESTIMATED_OBJECTIVE_FUNCTION_VALUE(new_point, other_args)
        new_function_value_noise = ESTIMATED_NOISE_VARIANCE(new_point, other_args)
        // Write the estimated objective values to the GP as *truth*
        gaussian_process.AddPoint(new_point, new_function_value, new_function_value_noise)
        optimal_points_to_sample.append(new_point)
      }

    \*Recall: each call to ComputeOptimalPointsToSampleWithRandomStarts() (gpp_math.hpp) kicks off a round of MGD optimization of 1-EI.

    Note that ideally the estimated objective function value (and noise) would be measured from the real-world (e.g.,
    by running an experiment). Then this algorithm would be optimal. However, the estimate probably is not accurately
    representating of the true objective.

    The estimation is handled through the "estimation_policy" input. Passing a ConstantLiarEstimationPolicy or
    KrigingBelieverEstimationPolicy object to this function will produce the "Constant Liar" and "Kriging Believer"
    heuristics described in Ginsbourger 2008. The interface for estimation_policy is generic so users may specify
    other estimators as well.

    Contrast this approach with ComputeOptimalPointsToSample() (gpp_math.hpp) which solves all outputs of the q,0-EI
    problem simultaneously instead of one point at a time. That method is more accurate (b/c it
    does not attempt to estimate the behavior of the underlying objective function) but much more expensive (because it
    requires monte-carlo iteration).

    If ``num_to_sample = 1``, this is exactly the same as ComputeOptimalPointsToSampleWithRandomStarts(); i.e.,
    both methods solve the 1-EI optimization problem the same way.

    Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the
    coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for
    sizing the domain and num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely.

    Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a
    local optima (i.e., the gradient may be substantially nonzero).

    .. WARNING:: this function fails if any step fails to find improvement! In that case, the return should not be
           read and status will report false.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,0-EI)
    :type num_to_sample: int >= 1
    :param estimation_policy: the policy to use to produce (heuristic) objective function estimates during q,0-EI optimization
    :type estimation_policy: subclass of ObjectiveEstimationPolicyInterface (C++ pure abstract class)
       e.g., C_GP.KrigingBelieverEstimationPolicy, C_GP.ConstantLiarEstimationPolicy
       See gpp_heuristic_expected_improvement_optimization.hpp
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that approximately maximize the expected improvement (solving the q,0-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seed based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.heuristic_expected_improvement_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process._gaussian_process,
        cpp_utils.cppify(ei_optimizer.domain._domain_bounds),
        estimation_policy,
        num_to_sample,
        ei_optimizer.objective_function._best_so_far,
        max_num_threads,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(
        best_points_to_sample,
        (num_to_sample, ei_optimizer.objective_function.dim))
Exemple #27
0
def multistart_hyperparameter_optimization(
        log_likelihood_optimizer,
        num_multistarts,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain.

    .. Note:: The following comments are copied to
      :mod:`moe.optimal_learning.python.python_version.log_likelihood.multistart_hyperparameter_optimization`.

    See :class:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.GaussianProcessLogMarginalLikelihood` and
    :class:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood`
    for an overview of some example log likelihood-like measures.

    Optimizers are: null ('dumb' search), gradient descent, newton
    Newton is the suggested optimizer.

    'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points'
    (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling.
    The hyperparameter_optimizer_parameters input specifies the desired optimization technique as well as parameters controlling
    its behavior (see :mod:`moe.optimal_learning.python.cpp_wrappers.optimization`).

    See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types.

    Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the
    coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for
    sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely.

    Note that the domain here must be specified in LOG-10 SPACE!

    Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a
    true optima (i.e., the gradient may be substantially nonzero).

    .. WARNING:: this function fails if NO improvement can be found!  In that case,
       the output will always be the first randomly chosen point. status will report failure.

    :param log_likelihood_optimizer: object that optimizes (e.g., gradient descent, newton) log likelihood over a domain
    :type log_likelihood_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``log_likelihood_optimizer`` (UNUSED, data is in log_likelihood_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: hyperparameters that maximize the specified log likelihood measure within the specified domain
    :rtype: array of float64 with shape (log_likelihood_optimizer.objective_function.num_hyperparameters)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seed based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    # C++ expects the domain in log10 space and in list form
    domain_bounds_log10 = numpy.log10(log_likelihood_optimizer.domain._domain_bounds)

    hyperparameters_opt = C_GP.multistart_hyperparameter_optimization(
        log_likelihood_optimizer.optimizer_parameters,
        cpp_utils.cppify(domain_bounds_log10),
        cpp_utils.cppify(log_likelihood_optimizer.objective_function._points_sampled),
        cpp_utils.cppify(log_likelihood_optimizer.objective_function._points_sampled_value),
        log_likelihood_optimizer.objective_function.dim,
        log_likelihood_optimizer.objective_function._num_sampled,
        cpp_utils.cppify_hyperparameters(log_likelihood_optimizer.objective_function.hyperparameters),
        cpp_utils.cppify(log_likelihood_optimizer.objective_function._points_sampled_noise_variance),
        max_num_threads,
        randomness,
        status,
    )
    return numpy.array(hyperparameters_opt)
Exemple #28
0
 def test_run_cpp_unit_tests(self):
     """Call C++ function that runs all C++ unit tests and assert 0 errors."""
     number_of_cpp_test_errors = C_GP.run_cpp_tests()
     assert number_of_cpp_test_errors == 0