Example #1
0
    def compute_variance_of_points(self, points_to_sample):
        r"""Compute the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``).

        .. Warning:: ``points_to_sample`` should not contain duplicate points.

        The variance matrix is symmetric although we currently return the full representation.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :return: var_star: variance matrix of this GP
        :rtype: array of float64 with shape (num_to_sample, num_to_sample)

        """
        var_star = python_utils.build_covariance_matrix(self._covariance, points_to_sample)  # this is K_star_star
        if self.num_sampled == 0:
            return numpy.diag(numpy.diag(var_star))

        K_star = python_utils.build_mix_covariance_matrix(
            self._covariance,
            self._points_sampled,
            points_to_sample,
        )
        V = scipy.linalg.solve_triangular(
            self._K_chol[0],
            K_star,
            lower=self._K_chol[1],
            overwrite_b=True,
        )

        # cheaper to go through scipy.linalg.get_blas_funcs() which can compute A = alpha*B*C + beta*A in one pass
        var_star -= numpy.dot(V.T, V)
        return var_star
Example #2
0
    def compute_variance_of_points(self, points_to_sample):
        r"""Compute the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``).

        .. Warning:: ``points_to_sample`` should not contain duplicate points.

        The variance matrix is symmetric although we currently return the full representation.

        .. Note:: Comments are copied from
          :mod:`moe.optimal_learning.python.interfaces.gaussian_process_interface.GaussianProcessInterface.compute_variance_of_points`

        :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP
        :type points_to_sample: array of float64 with shape (num_to_sample, dim)
        :return: var_star: variance matrix of this GP
        :rtype: array of float64 with shape (num_to_sample, num_to_sample)

        """
        var_star = python_utils.build_covariance_matrix(self._covariance, points_to_sample)  # this is K_star_star
        if self.num_sampled == 0:
            return numpy.diag(numpy.diag(var_star))

        K_star = python_utils.build_mix_covariance_matrix(
            self._covariance,
            self._points_sampled,
            points_to_sample,
        )
        V = scipy.linalg.solve_triangular(
            self._K_chol[0],
            K_star,
            lower=self._K_chol[1],
            overwrite_b=True,
        )

        # cheaper to go through scipy.linalg.get_blas_funcs() which can compute A = alpha*B*C + beta*A in one pass
        var_star -= numpy.dot(V.T, V)
        return var_star
Example #3
0
 def _build_precomputed_data(self):
     """Set up precomputed data (cholesky factorization of K and K^-1 * y)."""
     if self.num_sampled == 0:
         self._K_chol = numpy.array([])
         self._K_inv_y = numpy.array([])
     else:
         covariance_matrix = python_utils.build_covariance_matrix(
             self._covariance,
             self._points_sampled,
             noise_variance=self._points_sampled_noise_variance,
         )
         self._K_chol = scipy.linalg.cho_factor(covariance_matrix, lower=True, overwrite_a=True)
         self._K_inv_y = scipy.linalg.cho_solve(self._K_chol, self._points_sampled_value)
Example #4
0
 def _build_precomputed_data(self):
     """Set up precomputed data (cholesky factorization of K and K^-1 * y)."""
     if self.num_sampled == 0:
         self._K_chol = numpy.array([])
         self._K_inv_y = numpy.array([])
     else:
         covariance_matrix = python_utils.build_covariance_matrix(
             self._covariance,
             self._points_sampled,
             noise_variance=self._points_sampled_noise_variance,
         )
         self._K_chol = scipy.linalg.cho_factor(covariance_matrix, lower=True, overwrite_a=True)
         self._K_inv_y = scipy.linalg.cho_solve(self._K_chol, self._points_sampled_value)
Example #5
0
    def compute_grad_log_likelihood(self):
        r"""Compute the gradient (wrt hyperparameters) of the _log_likelihood_type measure at the specified hyperparameters.

        .. NOTE:: These comments are copied from LogMarginalLikelihoodEvaluator::ComputeGradLogLikelihood in gpp_model_selection.cpp.

        Computes ``\pderiv{log(p(y | X, \theta))}{\theta_k} = \frac{1}{2} * y_i * \pderiv{K_{ij}}{\theta_k} * y_j - \frac{1}{2}``
        ``* trace(K^{-1}_{ij}\pderiv{K_{ij}}{\theta_k})``
        Or equivalently, ``= \frac{1}{2} * trace([\alpha_i \alpha_j - K^{-1}_{ij}]*\pderiv{K_{ij}}{\theta_k})``,
        where ``\alpha_i = K^{-1}_{ij} * y_j``

        :return: grad_log_likelihood: i-th entry is ``\pderiv{LL(y | X, \theta)}{\theta_i}``
        :rtype: array of float64 with shape (num_hyperparameters)

        """
        covariance_matrix = python_utils.build_covariance_matrix(
            self._covariance,
            self._points_sampled,
            noise_variance=self._points_sampled_noise_variance,
        )
        K_chol = scipy.linalg.cho_factor(covariance_matrix,
                                         lower=True,
                                         overwrite_a=True)
        K_inv_y = scipy.linalg.cho_solve(K_chol, self._points_sampled_value)

        grad_hyperparameter_cov_matrix = python_utils.build_hyperparameter_grad_covariance_matrix(
            self._covariance,
            self._points_sampled,
        )
        grad_log_marginal = numpy.empty(self.num_hyperparameters)
        for k in xrange(self.num_hyperparameters):
            grad_cov_block = grad_hyperparameter_cov_matrix[..., k]
            # computing 0.5 * \alpha^T * grad_hyperparameter_cov_matrix * \alpha, where \alpha = K^-1 * y (aka K_inv_y)
            # temp_vec := grad_hyperparameter_cov_matrix * K_inv_y
            temp_vec = numpy.dot(grad_cov_block, K_inv_y)
            # computes 0.5 * K_inv_y^T * temp_vec
            grad_log_marginal[k] = 0.5 * numpy.dot(K_inv_y, temp_vec)

            # compute -0.5 * tr(K^-1 * dK/d\theta)
            temp = scipy.linalg.cho_solve(K_chol,
                                          grad_cov_block,
                                          overwrite_b=True)
            grad_log_marginal[k] -= 0.5 * temp.trace()
            # TODO(GH-180): this can be much faster if we form K^-1 explicitly (see below), but that is less accurate
            # grad_log_marginal[k] -= 0.5 * numpy.einsum('ij,ji', K_inv, grad_cov_block)

        return grad_log_marginal
Example #6
0
    def compute_grad_log_likelihood(self):
        r"""Compute the gradient (wrt hyperparameters) of the _log_likelihood_type measure at the specified hyperparameters.

        .. NOTE:: These comments are copied from LogMarginalLikelihoodEvaluator::ComputeGradLogLikelihood in gpp_model_selection.cpp.

        Computes ``\pderiv{log(p(y | X, \theta))}{\theta_k} = \frac{1}{2} * y_i * \pderiv{K_{ij}}{\theta_k} * y_j - \frac{1}{2}``
        ``* trace(K^{-1}_{ij}\pderiv{K_{ij}}{\theta_k})``
        Or equivalently, ``= \frac{1}{2} * trace([\alpha_i \alpha_j - K^{-1}_{ij}]*\pderiv{K_{ij}}{\theta_k})``,
        where ``\alpha_i = K^{-1}_{ij} * y_j``

        :return: grad_log_likelihood: i-th entry is ``\pderiv{LL(y | X, \theta)}{\theta_i}``
        :rtype: array of float64 with shape (num_hyperparameters)

        """
        covariance_matrix = python_utils.build_covariance_matrix(
            self._covariance,
            self._points_sampled,
            noise_variance=self._points_sampled_noise_variance,
        )
        K_chol = scipy.linalg.cho_factor(covariance_matrix, lower=True, overwrite_a=True)
        K_inv_y = scipy.linalg.cho_solve(K_chol, self._points_sampled_value)

        grad_hyperparameter_cov_matrix = python_utils.build_hyperparameter_grad_covariance_matrix(
            self._covariance,
            self._points_sampled,
        )
        grad_log_marginal = numpy.empty(self.num_hyperparameters)
        for k in xrange(self.num_hyperparameters):
            grad_cov_block = grad_hyperparameter_cov_matrix[..., k]
            # computing 0.5 * \alpha^T * grad_hyperparameter_cov_matrix * \alpha, where \alpha = K^-1 * y (aka K_inv_y)
            # temp_vec := grad_hyperparameter_cov_matrix * K_inv_y
            temp_vec = numpy.dot(grad_cov_block, K_inv_y)
            # computes 0.5 * K_inv_y^T * temp_vec
            grad_log_marginal[k] = 0.5 * numpy.dot(K_inv_y, temp_vec)

            # compute -0.5 * tr(K^-1 * dK/d\theta)
            temp = scipy.linalg.cho_solve(K_chol, grad_cov_block, overwrite_b=True)
            grad_log_marginal[k] -= 0.5 * temp.trace()
            # TODO(GH-180): this can be much faster if we form K^-1 explicitly (see below), but that is less accurate
            # grad_log_marginal[k] -= 0.5 * numpy.einsum('ij,ji', K_inv, grad_cov_block)

        return grad_log_marginal
    def _build_precomputed_data(self):
        """Set up precomputed data (cholesky factorization of K and K^-1 * y)."""
        if self.num_sampled == 0:
            self._K_chol = numpy.array([])
            self._K_inv_y = numpy.array([])
        else:
            covariance_matrix = python_utils.build_covariance_matrix(
                self._covariance,
                self._points_sampled,
                noise_variance=self._points_sampled_noise_variance,
            )

            C = self._build_integrated_term_maxtrix(self._covariance, self._points_sampled)
            self._K_Inv = numpy.linalg.inv(covariance_matrix)
            self._K_C = numpy.empty((covariance_matrix.shape[0],covariance_matrix.shape[0]))
            self._K_C = numpy.multiply(C, self._K_Inv)
            self._K_chol = scipy.linalg.cho_factor(covariance_matrix, lower=True, overwrite_a=True)
            self._K_inv_y = scipy.linalg.cho_solve(self._K_chol, self._points_sampled_value)
            self._marginal_mean_mat = self._build_marginal_matrix_mean()
            self._marginal_mean_mat_gradient = self._build_marginal_matrix_mean_gradient()
Example #8
0
    def compute_log_likelihood(self):
        r"""Compute the _log_likelihood_type measure at the specified hyperparameters.

        .. NOTE:: These comments are copied from LogMarginalLikelihoodEvaluator::ComputeLogLikelihood in gpp_model_selection.cpp.

        ``log p(y | X, \theta) = -\frac{1}{2} * y^T * K^-1 * y - \frac{1}{2} * \log(det(K)) - \frac{n}{2} * \log(2*pi)``
        where n is ``num_sampled``, ``\theta`` are the hyperparameters, and ``\log`` is the natural logarithm.  In the following,
        ``term1 = -\frac{1}{2} * y^T * K^-1 * y``
        ``term2 = -\frac{1}{2} * \log(det(K))``
        ``term3 = -\frac{n}{2} * \log(2*pi)``

        For an SPD matrix ``K = L * L^T``,
        ``det(K) = \Pi_i L_ii^2``
        We could compute this directly and then take a logarithm.  But we also know:
        ``\log(det(K)) = 2 * \sum_i \log(L_ii)``
        The latter method is (currently) preferred for computing ``\log(det(K))`` due to reduced chance for overflow
        and (possibly) better numerical conditioning.

        :return: value of log_likelihood evaluated at hyperparameters (``LL(y | X, \theta)``)
        :rtype: float64

        """
        covariance_matrix = python_utils.build_covariance_matrix(
            self._covariance,
            self._points_sampled,
            noise_variance=self._points_sampled_noise_variance,
        )
        K_chol = scipy.linalg.cho_factor(covariance_matrix,
                                         lower=True,
                                         overwrite_a=True)

        log_marginal_term2 = -numpy.log(K_chol[0].diagonal()).sum()

        K_inv_y = scipy.linalg.cho_solve(K_chol, self._points_sampled_value)
        log_marginal_term1 = -0.5 * numpy.inner(self._points_sampled_value,
                                                K_inv_y)

        log_marginal_term3 = -0.5 * numpy.float64(
            self._points_sampled_value.size) * numpy.log(2.0 * numpy.pi)
        return log_marginal_term1 + log_marginal_term2 + log_marginal_term3
Example #9
0
    def compute_log_likelihood(self):
        r"""Compute the _log_likelihood_type measure at the specified hyperparameters.

        .. NOTE:: These comments are copied from LogMarginalLikelihoodEvaluator::ComputeLogLikelihood in gpp_model_selection.cpp.

        ``log p(y | X, \theta) = -\frac{1}{2} * y^T * K^-1 * y - \frac{1}{2} * \log(det(K)) - \frac{n}{2} * \log(2*pi)``
        where n is ``num_sampled``, ``\theta`` are the hyperparameters, and ``\log`` is the natural logarithm.  In the following,
        ``term1 = -\frac{1}{2} * y^T * K^-1 * y``
        ``term2 = -\frac{1}{2} * \log(det(K))``
        ``term3 = -\frac{n}{2} * \log(2*pi)``

        For an SPD matrix ``K = L * L^T``,
        ``det(K) = \Pi_i L_ii^2``
        We could compute this directly and then take a logarithm.  But we also know:
        ``\log(det(K)) = 2 * \sum_i \log(L_ii)``
        The latter method is (currently) preferred for computing ``\log(det(K))`` due to reduced chance for overflow
        and (possibly) better numerical conditioning.

        :return: value of log_likelihood evaluated at hyperparameters (``LL(y | X, \theta)``)
        :rtype: float64

        """
        covariance_matrix = python_utils.build_covariance_matrix(
            self._covariance,
            self._points_sampled,
            noise_variance=self._points_sampled_noise_variance,
        )
        K_chol = scipy.linalg.cho_factor(covariance_matrix, lower=True, overwrite_a=True)

        log_marginal_term2 = -numpy.log(K_chol[0].diagonal()).sum()

        K_inv_y = scipy.linalg.cho_solve(K_chol, self._points_sampled_value)
        log_marginal_term1 = -0.5 * numpy.inner(self._points_sampled_value, K_inv_y)

        log_marginal_term3 = -0.5 * numpy.float64(self._points_sampled_value.size) * numpy.log(2.0 * numpy.pi)
        return log_marginal_term1 + log_marginal_term2 + log_marginal_term3