Example #1
0
    def compute_grad_variance_of_points(self, points_to_sample, num_derivatives=-1):
        r"""Compute the gradient of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``) wrt ``Xs``.

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        This function is similar to compute_grad_cholesky_variance_of_points() (below), except this does not include
        gradient terms from the cholesky factorization. Description will not be duplicated here.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_grad_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped
        :type num_derivatives: int
        :return: grad_var: gradient of the variance matrix of this GP
        :rtype: array of float64 with shape (num_derivatives, num_to_sample, num_to_sample, dim)

        """
        num_derivatives = self._clamp_num_derivatives(points_to_sample.shape[0], num_derivatives)
        num_to_sample = points_to_sample.shape[0]

        grad_variance = self._gaussian_process.compute_grad_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
            num_derivatives,
        )
        return cpp_utils.uncppify(grad_variance, (num_derivatives, num_to_sample, num_to_sample, self.dim))
Example #2
0
    def compute_grad_posterior_mean(self, force_monte_carlo=False):
        r"""Compute the gradient of knowledge gradient at ``points_to_sample`` wrt ``points_to_sample``, with ``points_being_sampled`` concurrent samples.

        .. Note:: These comments were copied from
          :meth:`moe.optimal_learning.python.interfaces.expected_improvement_interface.ExpectedImprovementInterface.compute_grad_expected_improvement`

        ``points_to_sample`` is the "q" and ``points_being_sampled`` is the "p" in q,p-EI.

        In general, the expressions for gradients of EI are complex and difficult to evaluate; hence we use
        Monte-Carlo simulation to approximate it. When faster (e.g., analytic) techniques are available, we will prefer them.

        The MC computation of grad EI is similar to the computation of EI (decsribed in
        compute_expected_improvement). We differentiate ``y = \mu + Lw`` wrt ``points_to_sample``;
        only terms from the gradient of ``\mu`` and ``L`` contribute. In EI, we computed:
        ``improvement_per_step = max(max(best_so_far - y), 0.0)``
        and noted that only the smallest component of ``y`` may contribute (if it is > 0.0).
        Call this index ``winner``. Thus in computing grad EI, we only add gradient terms
        that are attributable to the ``winner``-th component of ``y``.

        :param force_monte_carlo: whether to force monte carlo evaluation (vs using fast/accurate analytic eval when possible)
        :type force_monte_carlo: boolean
        :return: gradient of EI, ``\pderiv{EI(Xq \cup Xp)}{Xq_{i,d}}`` where ``Xq`` is ``points_to_sample``
          and ``Xp`` is ``points_being_sampled`` (grad EI from sampling ``points_to_sample`` with
          ``points_being_sampled`` concurrent experiments wrt each dimension of the points in ``points_to_sample``)
        :rtype: array of float64 with shape (num_to_sample, dim)

        """
        grad_kg = C_GP.compute_grad_posterior_mean(
            self._gaussian_process._gaussian_process,
            self._num_fidelity,
            cpp_utils.cppify(self._points_to_sample),
        )
        return cpp_utils.uncppify(grad_kg, (1, self.dim - self._num_fidelity))
Example #3
0
    def compute_grad_cholesky_variance_of_points(self, points_to_sample, num_derivatives=-1):
        r"""Compute the gradient of the cholesky factorization of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``) wrt ``Xs``.

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        This function accounts for the effect on the gradient resulting from
        cholesky-factoring the variance matrix.  See Smith 1995 for algorithm details.

        Note that ``grad_chol`` is nominally sized:
        ``grad_chol[num_to_sample][num_to_sample][num_to_sample][dim]``.
        Let this be indexed ``grad_chol[k][j][i][d]``, which is read the derivative of ``var[j][i]``
        with respect to ``x_{k,d}`` (x = ``points_to_sample``)

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_grad_cholesky_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped
        :type num_derivatives: int
        :return: grad_chol: gradient of the cholesky factorization of the variance matrix of this GP.
          ``grad_chol[k][j][i][d]`` is actually the gradients of ``var_{j,i}`` with
          respect to ``x_{k,d}``, the d-th dimension of the k-th entry of ``points_to_sample``
        :rtype: array of float64 with shape (num_derivatives, num_to_sample, num_to_sample, dim)

        """
        num_derivatives = self._clamp_num_derivatives(points_to_sample.shape[0], num_derivatives)
        num_to_sample = points_to_sample.shape[0]

        grad_chol_decomp = self._gaussian_process.compute_grad_cholesky_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
            num_derivatives,
        )
        return cpp_utils.uncppify(grad_chol_decomp, (num_derivatives, num_to_sample, num_to_sample, self.dim))
Example #4
0
    def compute_grad_variance_of_points(self,
                                        points_to_sample,
                                        num_derivatives=-1):
        r"""Compute the gradient of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``) wrt ``Xs``.

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        This function is similar to compute_grad_cholesky_variance_of_points() (below), except this does not include
        gradient terms from the cholesky factorization. Description will not be duplicated here.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_grad_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped
        :type num_derivatives: int
        :return: grad_var: gradient of the variance matrix of this GP
        :rtype: array of float64 with shape (num_derivatives, num_to_sample, num_to_sample, dim)

        """
        num_derivatives = self._clamp_num_derivatives(
            points_to_sample.shape[0], num_derivatives)
        num_to_sample = points_to_sample.shape[0]

        grad_variance = self._gaussian_process.compute_grad_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
            num_derivatives,
        )
        return cpp_utils.uncppify(
            grad_variance,
            (num_derivatives, num_to_sample, num_to_sample, self.dim))
Example #5
0
    def compute_grad_mean_of_points(self, points_to_sample, num_derivatives=-1):
        r"""Compute the gradient of the mean of this GP at each of point of ``Xs`` (``points_to_sample``) wrt ``Xs``.

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        Note that ``grad_mu`` is nominally sized: ``grad_mu[num_to_sample][num_to_sample][dim]``. This is
        the the d-th component of the derivative evaluated at the i-th input wrt the j-th input.
        However, for ``0 <= i,j < num_to_sample``, ``i != j``, ``grad_mu[j][i][d] = 0``.
        (See references or implementation for further details.)
        Thus, ``grad_mu`` is stored in a reduced form which only tracks the nonzero entries.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_grad_mean_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped
        :type num_derivatives: int
        :return: grad_mu: gradient of the mean of the GP. ``grad_mu[i][d]`` is actually the gradient
          of ``\mu_i`` wrt ``x_{i,d}``, the d-th dim of the i-th entry of ``points_to_sample``.
        :rtype: array of float64 with shape (num_to_sample, dim)

        """
        num_derivatives = self._clamp_num_derivatives(points_to_sample.shape[0], num_derivatives)
        grad_mu = self._gaussian_process.compute_grad_mean_of_points(
            cpp_utils.cppify(points_to_sample[:num_derivatives, ...]),
            num_derivatives,
        )
        return cpp_utils.uncppify(grad_mu, (num_derivatives, self.dim))
Example #6
0
    def compute_grad_mean_of_points(self,
                                    points_to_sample,
                                    num_derivatives=-1):
        r"""Compute the gradient of the mean of this GP at each of point of ``Xs`` (``points_to_sample``) wrt ``Xs``.

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        Note that ``grad_mu`` is nominally sized: ``grad_mu[num_to_sample][num_to_sample][dim]``. This is
        the the d-th component of the derivative evaluated at the i-th input wrt the j-th input.
        However, for ``0 <= i,j < num_to_sample``, ``i != j``, ``grad_mu[j][i][d] = 0``.
        (See references or implementation for further details.)
        Thus, ``grad_mu`` is stored in a reduced form which only tracks the nonzero entries.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_grad_mean_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped
        :type num_derivatives: int
        :return: grad_mu: gradient of the mean of the GP. ``grad_mu[i][d]`` is actually the gradient
          of ``\mu_i`` wrt ``x_{i,d}``, the d-th dim of the i-th entry of ``points_to_sample``.
        :rtype: array of float64 with shape (num_to_sample, dim)

        """
        num_derivatives = self._clamp_num_derivatives(
            points_to_sample.shape[0], num_derivatives)
        grad_mu = self._gaussian_process.compute_grad_mean_of_points(
            cpp_utils.cppify(points_to_sample[:num_derivatives, ...]),
            num_derivatives,
        )
        return cpp_utils.uncppify(grad_mu, (num_derivatives, self.dim))
Example #7
0
    def compute_cholesky_variance_of_points(self, points_to_sample):
        r"""Compute the cholesky factorization of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``).

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :return: cholesky factorization of the variance matrix of this GP, lower triangular
        :rtype: array of float64 with shape (num_to_sample, num_to_sample), only lower triangle filled in

        """
        num_to_sample = points_to_sample.shape[0]
        cholesky_variance = self._gaussian_process.compute_cholesky_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
        )
        return cpp_utils.uncppify(cholesky_variance, (num_to_sample, num_to_sample))
Example #8
0
    def compute_cholesky_variance_of_points(self, points_to_sample):
        r"""Compute the cholesky factorization of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``).

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :return: cholesky factorization of the variance matrix of this GP, lower triangular
        :rtype: array of float64 with shape (num_to_sample, num_to_sample), only lower triangle filled in

        """
        num_to_sample = points_to_sample.shape[0]
        cholesky_variance = self._gaussian_process.compute_cholesky_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
        )
        return cpp_utils.uncppify(cholesky_variance,
                                  (num_to_sample, num_to_sample))
Example #9
0
    def compute_grad_cholesky_variance_of_points(self,
                                                 points_to_sample,
                                                 num_derivatives=-1):
        r"""Compute the gradient of the cholesky factorization of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``) wrt ``Xs``.

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        This function accounts for the effect on the gradient resulting from
        cholesky-factoring the variance matrix.  See Smith 1995 for algorithm details.

        Note that ``grad_chol`` is nominally sized:
        ``grad_chol[num_to_sample][num_to_sample][num_to_sample][dim]``.
        Let this be indexed ``grad_chol[k][j][i][d]``, which is read the derivative of ``var[j][i]``
        with respect to ``x_{k,d}`` (x = ``points_to_sample``)

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_grad_cholesky_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped
        :type num_derivatives: int
        :return: grad_chol: gradient of the cholesky factorization of the variance matrix of this GP.
          ``grad_chol[k][j][i][d]`` is actually the gradients of ``var_{j,i}`` with
          respect to ``x_{k,d}``, the d-th dimension of the k-th entry of ``points_to_sample``
        :rtype: array of float64 with shape (num_derivatives, num_to_sample, num_to_sample, dim)

        """
        num_derivatives = self._clamp_num_derivatives(
            points_to_sample.shape[0], num_derivatives)
        num_to_sample = points_to_sample.shape[0]

        grad_chol_decomp = self._gaussian_process.compute_grad_cholesky_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
            num_derivatives,
        )
        return cpp_utils.uncppify(
            grad_chol_decomp,
            (num_derivatives, num_to_sample, num_to_sample, self.dim))
Example #10
0
    def compute_variance_of_points(self, points_to_sample):
        r"""Compute the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``).

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        The variance matrix is symmetric although we currently return the full representation.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :return: var_star: variance matrix of this GP
        :rtype: array of float64 with shape (num_to_sample, num_to_sample)

        """
        num_to_sample = points_to_sample.shape[0]
        variance = self._gaussian_process.compute_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
        )
        return cpp_utils.uncppify(variance, (num_to_sample, num_to_sample))
Example #11
0
    def compute_grad_expected_improvement(self, force_monte_carlo=False):
        r"""Compute the gradient of expected improvement at ``points_to_sample`` wrt ``points_to_sample``, with ``points_being_sampled`` concurrent samples.

        .. Note:: These comments were copied from
          :meth:`moe.optimal_learning.python.interfaces.expected_improvement_interface.ExpectedImprovementInterface.compute_grad_expected_improvement`

        ``points_to_sample`` is the "q" and ``points_being_sampled`` is the "p" in q,p-EI.

        In general, the expressions for gradients of EI are complex and difficult to evaluate; hence we use
        Monte-Carlo simulation to approximate it. When faster (e.g., analytic) techniques are available, we will prefer them.

        The MC computation of grad EI is similar to the computation of EI (decsribed in
        compute_expected_improvement). We differentiate ``y = \mu + Lw`` wrt ``points_to_sample``;
        only terms from the gradient of ``\mu`` and ``L`` contribute. In EI, we computed:
        ``improvement_per_step = max(max(best_so_far - y), 0.0)``
        and noted that only the smallest component of ``y`` may contribute (if it is > 0.0).
        Call this index ``winner``. Thus in computing grad EI, we only add gradient terms
        that are attributable to the ``winner``-th component of ``y``.

        :param force_monte_carlo: whether to force monte carlo evaluation (vs using fast/accurate analytic eval when possible)
        :type force_monte_carlo: boolean
        :return: gradient of EI, ``\pderiv{EI(Xq \cup Xp)}{Xq_{i,d}}`` where ``Xq`` is ``points_to_sample``
          and ``Xp`` is ``points_being_sampled`` (grad EI from sampling ``points_to_sample`` with
          ``points_being_sampled`` concurrent experiments wrt each dimension of the points in ``points_to_sample``)
        :rtype: array of float64 with shape (num_to_sample, dim)

        """
        grad_ei = C_GP.compute_grad_expected_improvement(
            self._gaussian_process._gaussian_process,
            cpp_utils.cppify(self._points_to_sample),
            cpp_utils.cppify(self._points_being_sampled),
            self.num_to_sample,
            self.num_being_sampled,
            self._num_mc_iterations,
            self._best_so_far,
            force_monte_carlo,
            self._randomness,
        )
        return cpp_utils.uncppify(grad_ei, (self.num_to_sample, self.dim))
Example #12
0
    def compute_variance_of_points(self, points_to_sample):
        r"""Compute the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``).

        ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices.

        The variance matrix is symmetric although we currently return the full representation.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :return: var_star: variance matrix of this GP
        :rtype: array of float64 with shape (num_to_sample, num_to_sample)

        """
        num_to_sample = points_to_sample.shape[0]
        variance = self._gaussian_process.compute_variance_of_points(
            cpp_utils.cppify(points_to_sample),
            num_to_sample,
        )
        return cpp_utils.uncppify(variance, (num_to_sample, num_to_sample))
Example #13
0
def multistart_expected_improvement_mcmc_optimization(
    ei_optimizer,
    num_multistarts,
    num_to_sample,
    randomness=None,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    """Solve the q,p-KG problem, returning the optimal set of q points to sample CONCURRENTLY in future experiments.

    .. NOTE:: The following comments are copied from gpp_math.hpp, ComputeOptimalPointsToSample().
      These comments are copied into
      :func:`moe.optimal_learning.python.python_version.expected_improvement.multistart_expected_improvement_optimization`

    This is the primary entry-point for EI optimization in the optimal_learning library. It offers our best shot at
    improving robustness by combining higher accuracy methods like gradient descent with fail-safes like random/grid search.

    Returns the optimal set of q points to sample CONCURRENTLY by solving the q,p-EI problem.  That is, we may want to run 4
    experiments at the same time and maximize the EI across all 4 experiments at once while knowing of 2 ongoing experiments
    (4,2-EI). This function handles this use case. Evaluation of q,p-EI (and its gradient) for q > 1 or p > 1 is expensive
    (requires monte-carlo iteration), so this method is usually very expensive.

    Compared to ComputeHeuristicPointsToSample() (``gpp_heuristic_expected_improvement_optimization.hpp``), this function
    makes no external assumptions about the underlying objective function. Instead, it utilizes a feature of the
    GaussianProcess that allows the GP to account for ongoing/incomplete experiments.

    If ``num_to_sample = 1``, this is the same as ComputeOptimalPointsToSampleWithRandomStarts().

    The option of using GPU to compute general q,p-EI via MC simulation is also available. To enable it, make sure you have
    installed GPU components of MOE, otherwise, it will throw Runtime excpetion.

    :param kg_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type kg_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,p-EI)
    :type num_to_sample: int >= 1
    :param use_gpu: set to True if user wants to use GPU for MC simulation
    :type use_gpu: bool
    :param which_gpu: GPU device ID
    :type which_gpu: int >= 0
    :param randomness: RNGs used by C++ to generate initial guesses and as the source of normal random numbers when monte-carlo is used
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that maximize the knowledge gradient (solving the q,p-KG problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seeds based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.multistart_expected_improvement_mcmc_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process_mcmc.
        _gaussian_process_mcmc,
        cpp_utils.cppify(ei_optimizer.domain.domain_bounds),
        cpp_utils.cppify(
            ei_optimizer.objective_function._points_being_sampled),
        num_to_sample,
        ei_optimizer.objective_function.num_being_sampled,
        cpp_utils.cppify(
            numpy.array(ei_optimizer.objective_function._best_so_far_list)),
        ei_optimizer.objective_function._num_mc_iterations,
        max_num_threads,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(
        best_points_to_sample,
        (num_to_sample, ei_optimizer.objective_function.dim))
Example #14
0
def _heuristic_expected_improvement_optimization(
    ei_optimizer,
    num_multistarts,
    num_to_sample,
    estimation_policy,
    randomness=None,
    max_num_threads=DEFAULT_MAX_NUM_THREADS,
    status=None,
):
    r"""Heuristically solve the q,0-EI problem (estimating multistart_expected_improvement_optimization()) using 1,0-EI solves.

    Consider this as an alternative when multistart_expected_improvement_optimization() is too expensive. Since this function
    kernalizes 1,0-EI, it always hits the analytic case; hence it is much faster than q,0-EI which requires monte-carlo.
    Users will probably call one of this function's wrappers (e.g., constant_liar_expected_improvement_optimization() or
    kriging_believer_expected_improvement_optimization()) instead of accessing this directly.

    Calls into heuristic_expected_improvement_optimization_wrapper in cpp/GPP_python_expected_improvement.cpp.

    .. NOTE:: The following comments are copied from gpp_heuristic_expected_improvement_optimization.hpp, ComputeHeuristicPointsToSample().

    It heuristically solves the q,0-EI optimization problem. As a reminder, that problem is finding the set of q points
    that maximizes the Expected Improvement (saved in the output, ``best_points_to_sample``). Solving for q points simultaneously
    usually requires monte-carlo iteration and is expensive. The heuristic here solves q-EI as a sequence of 1-EI problems.
    We solve 1-EI, and then we *ASSUME* an objective function value at the resulting optima. This process is repeated q times.
    It is perhaps more clear in pseudocode::

      points_being_sampled = {}  // This stays empty! We are only working with 1,0-EI solves
      for i = 0:num_to_sample-1 {
        // First, solve the 1,0-EI problem\*
        new_point = ComputeOptimalPointsToSampleWithRandomStarts(gaussian_process, points_being_sampled, other_parameters)
        // *Estimate* the objective function value at new_point
        new_function_value = ESTIMATED_OBJECTIVE_FUNCTION_VALUE(new_point, other_args)
        new_function_value_noise = ESTIMATED_NOISE_VARIANCE(new_point, other_args)
        // Write the estimated objective values to the GP as *truth*
        gaussian_process.AddPoint(new_point, new_function_value, new_function_value_noise)
        optimal_points_to_sample.append(new_point)
      }

    \*Recall: each call to ComputeOptimalPointsToSampleWithRandomStarts() (gpp_math.hpp) kicks off a round of MGD optimization of 1-EI.

    Note that ideally the estimated objective function value (and noise) would be measured from the real-world (e.g.,
    by running an experiment). Then this algorithm would be optimal. However, the estimate probably is not accurately
    representating of the true objective.

    The estimation is handled through the "estimation_policy" input. Passing a ConstantLiarEstimationPolicy or
    KrigingBelieverEstimationPolicy object to this function will produce the "Constant Liar" and "Kriging Believer"
    heuristics described in Ginsbourger 2008. The interface for estimation_policy is generic so users may specify
    other estimators as well.

    Contrast this approach with ComputeOptimalPointsToSample() (gpp_math.hpp) which solves all outputs of the q,0-EI
    problem simultaneously instead of one point at a time. That method is more accurate (b/c it
    does not attempt to estimate the behavior of the underlying objective function) but much more expensive (because it
    requires monte-carlo iteration).

    If ``num_to_sample = 1``, this is exactly the same as ComputeOptimalPointsToSampleWithRandomStarts(); i.e.,
    both methods solve the 1-EI optimization problem the same way.

    Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the
    coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for
    sizing the domain and num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely.

    Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a
    local optima (i.e., the gradient may be substantially nonzero).

    .. WARNING:: this function fails if any step fails to find improvement! In that case, the return should not be
           read and status will report false.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,0-EI)
    :type num_to_sample: int >= 1
    :param estimation_policy: the policy to use to produce (heuristic) objective function estimates during q,0-EI optimization
    :type estimation_policy: subclass of ObjectiveEstimationPolicyInterface (C++ pure abstract class)
       e.g., C_GP.KrigingBelieverEstimationPolicy, C_GP.ConstantLiarEstimationPolicy
       See gpp_heuristic_expected_improvement_optimization.hpp
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that approximately maximize the expected improvement (solving the q,0-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seed based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.heuristic_expected_improvement_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process._gaussian_process,
        cpp_utils.cppify(ei_optimizer.domain._domain_bounds),
        estimation_policy,
        num_to_sample,
        ei_optimizer.objective_function._best_so_far,
        max_num_threads,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(
        best_points_to_sample,
        (num_to_sample, ei_optimizer.objective_function.dim))
Example #15
0
def multistart_expected_improvement_optimization(
        ei_optimizer,
        num_multistarts,
        num_to_sample,
        use_gpu=False,
        which_gpu=0,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    """Solve the q,p-EI problem, returning the optimal set of q points to sample CONCURRENTLY in future experiments.

    When ``points_being_sampled.size == 0 && num_to_sample == 1``, this function will use (fast) analytic EI computations.

    .. NOTE:: The following comments are copied from gpp_math.hpp, ComputeOptimalPointsToSample().
      These comments are copied into
      :func:`moe.optimal_learning.python.python_version.expected_improvement.multistart_expected_improvement_optimization`

    This is the primary entry-point for EI optimization in the optimal_learning library. It offers our best shot at
    improving robustness by combining higher accuracy methods like gradient descent with fail-safes like random/grid search.

    Returns the optimal set of q points to sample CONCURRENTLY by solving the q,p-EI problem.  That is, we may want to run 4
    experiments at the same time and maximize the EI across all 4 experiments at once while knowing of 2 ongoing experiments
    (4,2-EI). This function handles this use case. Evaluation of q,p-EI (and its gradient) for q > 1 or p > 1 is expensive
    (requires monte-carlo iteration), so this method is usually very expensive.

    Compared to ComputeHeuristicPointsToSample() (``gpp_heuristic_expected_improvement_optimization.hpp``), this function
    makes no external assumptions about the underlying objective function. Instead, it utilizes a feature of the
    GaussianProcess that allows the GP to account for ongoing/incomplete experiments.

    If ``num_to_sample = 1``, this is the same as ComputeOptimalPointsToSampleWithRandomStarts().

    The option of using GPU to compute general q,p-EI via MC simulation is also available. To enable it, make sure you have
    installed GPU components of MOE, otherwise, it will throw Runtime excpetion.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,p-EI)
    :type num_to_sample: int >= 1
    :param use_gpu: set to True if user wants to use GPU for MC simulation
    :type use_gpu: bool
    :param which_gpu: GPU device ID
    :type which_gpu: int >= 0
    :param randomness: RNGs used by C++ to generate initial guesses and as the source of normal random numbers when monte-carlo is used
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that maximize the expected improvement (solving the q,p-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seeds based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.multistart_expected_improvement_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process._gaussian_process,
        cpp_utils.cppify(ei_optimizer.domain.domain_bounds),
        cpp_utils.cppify(ei_optimizer.objective_function._points_being_sampled),
        num_to_sample,
        ei_optimizer.objective_function.num_being_sampled,
        ei_optimizer.objective_function._best_so_far,
        ei_optimizer.objective_function._num_mc_iterations,
        max_num_threads,
        use_gpu,
        which_gpu,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(best_points_to_sample, (num_to_sample, ei_optimizer.objective_function.dim))
Example #16
0
def _heuristic_expected_improvement_optimization(
        ei_optimizer,
        num_multistarts,
        num_to_sample,
        estimation_policy,
        randomness=None,
        max_num_threads=DEFAULT_MAX_NUM_THREADS,
        status=None,
):
    r"""Heuristically solve the q,0-EI problem (estimating multistart_expected_improvement_optimization()) using 1,0-EI solves.

    Consider this as an alternative when multistart_expected_improvement_optimization() is too expensive. Since this function
    kernalizes 1,0-EI, it always hits the analytic case; hence it is much faster than q,0-EI which requires monte-carlo.
    Users will probably call one of this function's wrappers (e.g., constant_liar_expected_improvement_optimization() or
    kriging_believer_expected_improvement_optimization()) instead of accessing this directly.

    Calls into heuristic_expected_improvement_optimization_wrapper in cpp/GPP_python_expected_improvement.cpp.

    .. NOTE:: The following comments are copied from gpp_heuristic_expected_improvement_optimization.hpp, ComputeHeuristicPointsToSample().

    It heuristically solves the q,0-EI optimization problem. As a reminder, that problem is finding the set of q points
    that maximizes the Expected Improvement (saved in the output, ``best_points_to_sample``). Solving for q points simultaneously
    usually requires monte-carlo iteration and is expensive. The heuristic here solves q-EI as a sequence of 1-EI problems.
    We solve 1-EI, and then we *ASSUME* an objective function value at the resulting optima. This process is repeated q times.
    It is perhaps more clear in pseudocode::

      points_being_sampled = {}  // This stays empty! We are only working with 1,0-EI solves
      for i = 0:num_to_sample-1 {
        // First, solve the 1,0-EI problem\*
        new_point = ComputeOptimalPointsToSampleWithRandomStarts(gaussian_process, points_being_sampled, other_parameters)
        // *Estimate* the objective function value at new_point
        new_function_value = ESTIMATED_OBJECTIVE_FUNCTION_VALUE(new_point, other_args)
        new_function_value_noise = ESTIMATED_NOISE_VARIANCE(new_point, other_args)
        // Write the estimated objective values to the GP as *truth*
        gaussian_process.AddPoint(new_point, new_function_value, new_function_value_noise)
        optimal_points_to_sample.append(new_point)
      }

    \*Recall: each call to ComputeOptimalPointsToSampleWithRandomStarts() (gpp_math.hpp) kicks off a round of MGD optimization of 1-EI.

    Note that ideally the estimated objective function value (and noise) would be measured from the real-world (e.g.,
    by running an experiment). Then this algorithm would be optimal. However, the estimate probably is not accurately
    representating of the true objective.

    The estimation is handled through the "estimation_policy" input. Passing a ConstantLiarEstimationPolicy or
    KrigingBelieverEstimationPolicy object to this function will produce the "Constant Liar" and "Kriging Believer"
    heuristics described in Ginsbourger 2008. The interface for estimation_policy is generic so users may specify
    other estimators as well.

    Contrast this approach with ComputeOptimalPointsToSample() (gpp_math.hpp) which solves all outputs of the q,0-EI
    problem simultaneously instead of one point at a time. That method is more accurate (b/c it
    does not attempt to estimate the behavior of the underlying objective function) but much more expensive (because it
    requires monte-carlo iteration).

    If ``num_to_sample = 1``, this is exactly the same as ComputeOptimalPointsToSampleWithRandomStarts(); i.e.,
    both methods solve the 1-EI optimization problem the same way.

    Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the
    coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for
    sizing the domain and num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely.

    Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a
    local optima (i.e., the gradient may be substantially nonzero).

    .. WARNING:: this function fails if any step fails to find improvement! In that case, the return should not be
           read and status will report false.

    :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain
    :type ei_optimizer: cpp_wrappers.optimization.*Optimizer object
    :param num_multistarts: number of times to multistart ``ei_optimizer`` (UNUSED, data is in ei_optimizer.optimizer_parameters)
    :type num_multistarts: int > 0
    :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,0-EI)
    :type num_to_sample: int >= 1
    :param estimation_policy: the policy to use to produce (heuristic) objective function estimates during q,0-EI optimization
    :type estimation_policy: subclass of ObjectiveEstimationPolicyInterface (C++ pure abstract class)
       e.g., C_GP.KrigingBelieverEstimationPolicy, C_GP.ConstantLiarEstimationPolicy
       See gpp_heuristic_expected_improvement_optimization.hpp
    :param randomness: RNGs used by C++ to generate initial guesses
    :type randomness: RandomnessSourceContainer (C++ object; e.g., from C_GP.RandomnessSourceContainer())
    :param max_num_threads: maximum number of threads to use, >= 1
    :type max_num_threads: int > 0
    :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.)
    :type status: dict
    :return: point(s) that approximately maximize the expected improvement (solving the q,0-EI problem)
    :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim)

    """
    # Create enough randomness sources if none are specified.
    if randomness is None:
        randomness = C_GP.RandomnessSourceContainer(max_num_threads)
        # Set seed based on less repeatable factors (e.g,. time)
        randomness.SetRandomizedUniformGeneratorSeed(0)
        randomness.SetRandomizedNormalRNGSeed(0)

    # status must be an initialized dict for the call to C++.
    if status is None:
        status = {}

    best_points_to_sample = C_GP.heuristic_expected_improvement_optimization(
        ei_optimizer.optimizer_parameters,
        ei_optimizer.objective_function._gaussian_process._gaussian_process,
        cpp_utils.cppify(ei_optimizer.domain._domain_bounds),
        estimation_policy,
        num_to_sample,
        ei_optimizer.objective_function._best_so_far,
        max_num_threads,
        randomness,
        status,
    )

    # reform output to be a list of dim-dimensional points, dim = len(self.domain)
    return cpp_utils.uncppify(best_points_to_sample, (num_to_sample, ei_optimizer.objective_function.dim))