Beispiel #1
0
def log_gaussian_pdf(x, mu=None, Sigma=None, is_cholesky=False, compute_grad=False, cov_scaling=1.):
    D = len(x)
    
    if mu is None:
        mu = np.zeros(D)
        
    assert len(mu) == D

    if Sigma is not None:
        assert D == Sigma.shape[0]
        assert D == Sigma.shape[1]
    
        if is_cholesky is False:
            L = np.linalg.cholesky(Sigma)
        else:
            L = Sigma
        
        # solve y=K^(-1)x = L^(-T)L^(-1)x
        x = np.array(x - mu)
        y = solve_triangular(L, x.T, lower=True)
        y = solve_triangular(L.T, y, lower=False) / cov_scaling
        cov_L_diag = np.diag(L)
    else:
        # assume isotropic covariance, solve y=K^(-1)x
        y = np.array(x - mu) / cov_scaling
        cov_L_diag = np.ones(D)
        
    if not compute_grad:
        log_determinant_part = -np.sum(np.log(np.sqrt(cov_scaling) * cov_L_diag))
        quadratic_part = -0.5 * x.dot(y)
        const_part = -0.5 * D * np.log(2 * np.pi)
        
        return const_part + log_determinant_part + quadratic_part
    else:
        return -y
    def find_mode_newton(self, return_full=False):
        """
        Newton search for mode of p(y|f)p(f)
        
        from GP book, algorithm 3.1, added step size
        """
        K = self.gp.K

        if self.newton_start is None:
            f = zeros(len(K))
        else:
            f = self.newton_start

        if return_full:
            steps = [f]

        iteration = 0
        norm_difference = inf
        objective_value = -inf

        while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon:
            # from GP book, algorithm 3.1, added step size
            # scale log_lik_grad_vector and K^-1 f = a

            w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
            w_sqrt = sqrt(w)

            # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
            L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)
            b = f * w + self.newton_step * self.gp.likelihood.log_lik_grad_vector(self.gp.y, f)

            # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b))))
            a = w_sqrt * (K.dot(b))
            a = solve_triangular(L, a, lower=True)
            a = solve_triangular(L.T, a, lower=False)
            a = w_sqrt * a
            a = b - a

            f_new = K.dot(self.newton_step * a)

            # convergence stuff and next iteration
            objective_value_new = -0.5 * a.T.dot(f) + sum(self.gp.likelihood.log_lik_vector(self.gp.y, f))
            norm_difference = norm(f - f_new)

            if objective_value_new > objective_value:
                f = f_new
                if return_full:
                    steps.append(f)
            else:
                self.newton_step /= 2

            iteration += 1
            objective_value = objective_value_new

        self.computed = True

        if return_full:
            return f, L, asarray(steps)
        else:
            return f
Beispiel #3
0
 def cholesky_solve(L, x):
     """
     Solves X^-1  x = (LL^T) ^-1 x = L^-T  L ^-1 * x for a given Cholesky
     X=LL^T
     """
     x = solve_triangular(L, x.T, lower=True)
     x = solve_triangular(L.T, x, lower=False)
     return x
Beispiel #4
0
 def cholesky_solve(L, x):
     """
     Solves X^-1  x = (LL^T) ^-1 x = L^-T  L ^-1 * x for a given Cholesky
     X=LL^T
     """
     x = solve_triangular(L, x.T, lower=True)
     x = solve_triangular(L.T, x, lower=False)
     return x
Beispiel #5
0
def _solve_P_Q(U, V, structure=None):
    """
    A helper function for expm_2009.

    Parameters
    ----------
    U : ndarray
        Pade numerator.
    V : ndarray
        Pade denominator.
    structure : str, optional
        A string describing the structure of both matrices `U` and `V`.
        Only `upper_triangular` is currently supported.

    Notes
    -----
    The `structure` argument is inspired by similar args
    for theano and cvxopt functions.

    """
    P = U + V
    Q = -U + V
    if isspmatrix(U):
        return spsolve(Q, P)
    elif structure is None:
        return solve(Q, P)
    elif structure == UPPER_TRIANGULAR:
        return solve_triangular(Q, P)
    else:
        raise ValueError('unsupported matrix structure: ' + str(structure))
Beispiel #6
0
def _solve_P_Q(U, V, structure=None):
    """
    A helper function for expm_2009.

    Parameters
    ----------
    U : ndarray
        Pade numerator.
    V : ndarray
        Pade denominator.
    structure : str, optional
        A string describing the structure of both matrices `U` and `V`.
        Only `upper_triangular` is currently supported.

    Notes
    -----
    The `structure` argument is inspired by similar args
    for theano and cvxopt functions.

    """
    P = U + V
    Q = -U + V
    if isspmatrix(U):
        return spsolve(Q, P)
    elif structure is None:
        return solve(Q, P)
    elif structure == UPPER_TRIANGULAR:
        return solve_triangular(Q, P)
    else:
        raise ValueError('unsupported matrix structure: ' + str(structure))
 def log_pdf_multiple_points(self, X):
     assert(len(shape(X)) == 2)
     assert(shape(X)[1] == self.dimension)
     
     log_determinant_part = -sum(log(diag(self.L)))
     
     quadratic_parts = zeros(len(X))
     for i in range(len(X)):
         x = X[i] - self.mu
         
         # solve y=K^(-1)x = L^(-T)L^(-1)x
         y = solve_triangular(self.L, x.T, lower=True)
         y = solve_triangular(self.L.T, y, lower=False)
         quadratic_parts[i] = -0.5 * x.dot(y)
         
     const_part = -0.5 * len(self.L) * log(2 * pi)
     
     return const_part + log_determinant_part + quadratic_parts
    def get_gaussian(self, f=None, L=None):
        if f is None or L is None:
            f, L, _ = self.find_mode_newton(return_full=True)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
        w_sqrt = sqrt(w)
        K = self.gp.K

        # gp book 3.27, matrix inversion lemma on
        # (K^-1 +W)^-1 = K -KW^0.5 B^-1 W^0.5 K
        C = (K.T * w_sqrt).T
        C = solve_triangular(L, C, lower=True)
        C = solve_triangular(L.T, C, lower=False)
        C = (C.T * w_sqrt).T
        C = K.dot(C)
        C = K - C

        return Gaussian(f, C, is_cholesky=False)
    def get_gaussian(self, f=None, L=None):
        if f is None or L is None:
            f, L, _ = self.find_mode_newton(return_full=True)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
        w_sqrt = sqrt(w)
        K = self.gp.K

        # gp book 3.27, matrix inversion lemma on
        # (K^-1 +W)^-1 = K -KW^0.5 B^-1 W^0.5 K
        C = (K.T * w_sqrt).T
        C = solve_triangular(L, C, lower=True)
        C = solve_triangular(L.T, C, lower=False)
        C = (C.T * w_sqrt).T
        C = K.dot(C)
        C = K - C

        return Gaussian(f, C, is_cholesky=False)
Beispiel #10
0
    def log_pdf(self, X):
        assert (len(shape(X)) == 2)
        assert (shape(X)[1] == self.dimension)

        log_determinant_part = -sum(log(diag(self.L)))

        quadratic_parts = zeros(len(X))
        for i in range(len(X)):
            x = X[i] - self.mu

            # solve y=K^(-1)x = L^(-T)L^(-1)x
            y = solve_triangular(self.L, x.T, lower=True)
            y = solve_triangular(self.L.T, y, lower=False)
            quadratic_parts[i] = -0.5 * x.dot(y)

        const_part = -0.5 * len(self.L) * log(2 * pi)

        return const_part + log_determinant_part + quadratic_parts
Beispiel #11
0
def log_gaussian_pdf_multiple(X,
                              mu=None,
                              Sigma=None,
                              is_cholesky=False,
                              compute_grad=False,
                              cov_scaling=1.):
    D = X.shape[1]

    if mu is None:
        mu = np.zeros(D)

    assert len(mu) == D

    if Sigma is not None:
        assert D == Sigma.shape[0]
        assert D == Sigma.shape[1]

        if is_cholesky is False:
            L = np.linalg.cholesky(Sigma)
        else:
            L = Sigma

        # solve Y=K^(-1)(X-mu) = L^(-T)L^(-1)(X-mu)
        X = np.array(X - mu)

        Y = solve_triangular(L, X.T, lower=True)
        Y = solve_triangular(L.T, Y, lower=False) / cov_scaling
        Y = Y.T
        cov_L_diag = np.diag(L)
    else:
        # assume isotropic covariance, solve y=K^(-1)x
        Y = np.array(X - mu) / cov_scaling

        cov_L_diag = np.ones(D)

    if not compute_grad:
        log_determinant_part = -np.sum(
            np.log(np.sqrt(cov_scaling) * cov_L_diag))
        quadratic_part = -0.5 * np.sum(X * Y, axis=1)
        const_part = -0.5 * D * np.log(2 * np.pi)

        return const_part + log_determinant_part + quadratic_part
    else:
        return -Y
Beispiel #12
0
def log_gaussian_pdf(x,
                     mu=None,
                     Sigma=None,
                     is_cholesky=False,
                     compute_grad=False,
                     cov_scaling=1.):
    D = len(x)

    if mu is None:
        mu = np.zeros(D)

    assert len(mu) == D

    if Sigma is not None:
        assert D == Sigma.shape[0]
        assert D == Sigma.shape[1]

        if is_cholesky is False:
            L = np.linalg.cholesky(Sigma)
        else:
            L = Sigma

        # solve y=K^(-1)x = L^(-T)L^(-1)x
        x = np.array(x - mu)
        y = solve_triangular(L, x.T, lower=True)
        y = solve_triangular(L.T, y, lower=False) / cov_scaling
        cov_L_diag = np.diag(L)
    else:
        # assume isotropic covariance, solve y=K^(-1)x
        y = np.array(x - mu) / cov_scaling
        cov_L_diag = np.ones(D)

    if not compute_grad:
        log_determinant_part = -np.sum(
            np.log(np.sqrt(cov_scaling) * cov_L_diag))
        quadratic_part = -0.5 * x.dot(y)
        const_part = -0.5 * D * np.log(2 * np.pi)

        return const_part + log_determinant_part + quadratic_part
    else:
        return -y
Beispiel #13
0
def log_gaussian_pdf_multiple(X, mu=None, Sigma=None, is_cholesky=False, compute_grad=False, cov_scaling=1.):
    D = X.shape[1]
    
    if mu is None:
        mu = np.zeros(D)
        
    assert len(mu) == D

    if Sigma is not None:
        assert D == Sigma.shape[0]
        assert D == Sigma.shape[1]
    
        if is_cholesky is False:
            L = np.linalg.cholesky(Sigma)
        else:
            L = Sigma
        
        # solve Y=K^(-1)(X-mu) = L^(-T)L^(-1)(X-mu)
        X = np.array(X - mu)
        
        Y = solve_triangular(L, X.T, lower=True)
        Y = solve_triangular(L.T, Y, lower=False) / cov_scaling
        Y = Y.T
        cov_L_diag = np.diag(L)
    else:
        # assume isotropic covariance, solve y=K^(-1)x
        Y = np.array(X - mu) / cov_scaling
        
        cov_L_diag = np.ones(D)
        
    if not compute_grad:
        log_determinant_part = -np.sum(np.log(np.sqrt(cov_scaling) * cov_L_diag))
        quadratic_part = -0.5 * np.sum(X * Y, axis=1)
        const_part = -0.5 * D * np.log(2 * np.pi)
        
        return const_part + log_determinant_part + quadratic_part
    else:
        return -Y
    def predict(self, X_test, f_mode=None):
        """
        Predictions for GP with Laplace approximation.
        
        from GP book, algorithm 3.2,
        
        """
        if f_mode is None:
            f_mode = self.find_mode_newton()

        predictions = zeros(len(X_test))

        K = self.gp.K
        K_train_test = self.gp.covariance.compute(self.gp.X, X_test)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode)
        w_sqrt = sqrt(w)

        # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
        L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)

        # iterator for all testing points
        for i in range(len(X_test)):
            k = K_train_test[:, i]
            k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0]

            f_mean = k.dot(
                self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode))
            v = solve_triangular(L, w_sqrt * k, lower=True)
            f_var = k_self - v.T.dot(v)

            predictions[i] = integrate.quad(
                lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0]
#            # integrate over Gaussian using some crude numerical integration
#            samples=randn(1000)*sqrt(f_var) + f_mean
#
#            log_liks=self.gp.likelihood.log_lik_vector(1.0, samples)
#            predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks)

        return predictions
    def predict(self, X_test, f_mode=None):
        """
        Predictions for GP with Laplace approximation.
        
        from GP book, algorithm 3.2,
        
        """
        if f_mode is None:
            f_mode = self.find_mode_newton()

        predictions = zeros(len(X_test))

        K = self.gp.K
        K_train_test = self.gp.covariance.compute(self.gp.X, X_test)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode)
        w_sqrt = sqrt(w)

        # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
        L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)

        # iterator for all testing points
        for i in range(len(X_test)):
            k = K_train_test[:, i]
            k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0]

            f_mean = k.dot(self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode))
            v = solve_triangular(L, w_sqrt * k, lower=True)
            f_var = k_self - v.T.dot(v)

            predictions[i] = integrate.quad(lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0]
        #            # integrate over Gaussian using some crude numerical integration
        #            samples=randn(1000)*sqrt(f_var) + f_mean
        #
        #            log_liks=self.gp.likelihood.log_lik_vector(1.0, samples)
        #            predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks)

        return predictions
Beispiel #16
0
    
    experiment_dir_base = str(sys.argv[1])
    n = int(str(sys.argv[2]))
    
    # loop over parameters here
    
    experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep
    print "running experiments", n, "times at base", experiment_dir
   
    # load data
    data,labels=GPData.get_glass_data()

    # normalise and whiten dataset
    data-=mean(data, 0)
    L=cholesky(cov(data.T))
    data=solve_triangular(L, data.T, lower=True).T
    dim=shape(data)[1]

    # prior on theta and posterior target estimate
    theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5)
    distribution=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    sigma = 23.0
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)
    
    for i in range(n):
        
        mcmc_samplers = []
if __name__ == '__main__':
    # load data
    data, labels = GPData.get_glass_data()

    # throw away some data
    n = 250
    seed(1)
    idx = permutation(len(data))
    idx = idx[:n]
    data = data[idx]
    labels = labels[idx]

    # normalise and whiten dataset
    data -= mean(data, 0)
    L = cholesky(cov(data.T))
    data = solve_triangular(L, data.T, lower=True).T
    dim = shape(data)[1]

    # prior on theta and posterior target estimate
    theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5)
    target=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    # create sampler
    burnin = 10000
    num_iterations = burnin + 300000
    kernel = GaussianKernel(sigma=23.0)
    sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
    #    sampler=AdaptiveMetropolisLearnScale(target)
    #    sampler=StandardMetropolis(target)
    def find_mode_newton(self, return_full=False):
        """
        Newton search for mode of p(y|f)p(f)
        
        from GP book, algorithm 3.1, added step size
        """
        K = self.gp.K

        if self.newton_start is None:
            f = zeros(len(K))
        else:
            f = self.newton_start

        if return_full:
            steps = [f]

        iteration = 0
        norm_difference = inf
        objective_value = -inf

        while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon:
            # from GP book, algorithm 3.1, added step size
            # scale log_lik_grad_vector and K^-1 f = a

            w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
            w_sqrt = sqrt(w)

            # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
            L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)
            b = f * w + self.newton_step * \
                self.gp.likelihood.log_lik_grad_vector(self.gp.y, f)

            # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b))))
            a = (w_sqrt * (K.dot(b)))
            a = solve_triangular(L, a, lower=True)
            a = solve_triangular(L.T, a, lower=False)
            a = w_sqrt * a
            a = b - a

            f_new = K.dot(self.newton_step * a)

            # convergence stuff and next iteration
            objective_value_new = -0.5 * a.T.dot(f) + \
                                sum(self.gp.likelihood.log_lik_vector(self.gp.y, f))
            norm_difference = norm(f - f_new)

            if objective_value_new > objective_value:
                f = f_new
                if return_full:
                    steps.append(f)
            else:
                self.newton_step /= 2

            iteration += 1
            objective_value = objective_value_new

        self.computed = True

        if return_full:
            return f, L, asarray(steps)
        else:
            return f
Beispiel #19
0
 def solve_system(self, A, b):
     solve_triangular(A, b, unit_diagonal=True, debug=False, overwrite_b=True, lower=False)