Ejemplo n.º 1
0
 def predict(params, x, y, xstar):
     """Returns the predictive mean and covariance at locations xstar,
        of the latent function value f (without observation noise)."""
     mean, cov_params, noise_scale = unpack_kernel_params(params)
     cov_f_f = cov_func(cov_params, xstar, xstar)
     cov_y_f = cov_func(cov_params, x, xstar)
     cov_y_y = cov_func(cov_params, x, x) + noise_scale * np.eye(len(y))
     pred_mean = mean +   np.dot(solve(cov_y_y, cov_y_f).T, y - mean)
     pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)
     return pred_mean, pred_cov
Ejemplo n.º 2
0
 def predict(params, x, y, xstar):
     """Returns the predictive mean and covariance at locations xstar,
        of the latent function value f (without observation noise)."""
     mean, cov_params, noise_scale = unpack_kernel_params(params)
     cov_f_f = cov_func(cov_params, xstar, xstar)
     cov_y_f = cov_func(cov_params, x, xstar)
     cov_y_y = cov_func(cov_params, x, x) + noise_scale * np.eye(len(y))
     pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y - mean)
     pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)
     return pred_mean, pred_cov
Ejemplo n.º 3
0
 def conditional(x, y, xstar):
     #Assume zero mean prior
     """Returns the predictive mean and covariance at locations xstar,
        of the latent function value f (without observation noise).
        -assumed prior mean is zero mean u is the observed"""
     cov_f_f = RBF(xstar, xstar)
     cov_y_f = RBF(x, xstar)
     cov_y_y = RBF(x, x) + (noise_scale+tol) * np.eye(len(y))
     pred_mean = np.dot(solve(cov_y_y, cov_y_f).T, y)
     pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)+tol*np.eye(len(xstar))
     return pred_mean, pred_cov
Ejemplo n.º 4
0
def predict(params, x, y, xstar):
    """ Returns the predictive mean f(xstar) and covariance f(xstar)"""
    mean, cov_params, noise_scale = unpack_kernel_params(params)

    K_ff = covariance(cov_params, xstar, xstar)
    K_yf = covariance(cov_params, x, xstar)
    K_yy = covariance(cov_params, x, x) + noise_scale * np.eye(len(y))

    pred_mean = mean + np.dot(solve(K_yy, K_yf).T, y - mean)
    pred_cov = K_ff - np.dot(solve(K_yy, K_yf).T, K_yf)

    return pred_mean, pred_cov
Ejemplo n.º 5
0
def mvnlogpdf(x, mu, L):
    """
    not really logpdf. we need to use the weights
    to keep track of normalizing factors that differ
    across clusters

    L cholesky decomposition of covariance matrix
    """
    D = L.shape[0]
    logdet = 2 * np.sum(np.log(np.diagonal(L)))
    quad = np.inner(x - mu, solve(L.T, solve(L, (x - mu))))
    return -0.5 *(D * np.log(2 * np.pi) + logdet + quad)
Ejemplo n.º 6
0
    def cache(self):
        assert hasattr(self, "inputs")
        assert hasattr(self, "targets")
        x = np.atleast_2d(self.inputs)
        y = np.atleast_2d(self.targets)
        assert len(x) == len(y)

        n, D = x.shape
        n, E = y.shape

        self.K = self.kernel(self.hyp, x)
        self.iK = np.stack([solve(self.K[i], np.eye(n)) for i in range(E)])
        self.alpha = np.vstack([solve(self.K[i], y[:, i]) for i in range(E)]).T
Ejemplo n.º 7
0
 def predict(params, xstar, with_noise = False, FITC = False):
     """Returns the predictive mean and covariance at locations xstar,
        of the latent function value f (without observation noise)."""
     mean, cov_params, noise_scale, x0, y0 = unpack_gp_params(params)
     cov_f_f = cov_func(cov_params, xstar, xstar)
     cov_y_f = cov_func(cov_params, x0, xstar)
     cov_y_y = cov_func(cov_params, x0, x0) + noise_scale * np.eye(len(y0))
     pred_mean = mean +   np.dot(solve(cov_y_y, cov_y_f).T, y0 - mean)
     pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)
     if FITC:
         pred_cov = np.diag(np.diag(pred_cov))
     if with_noise:
         pred_cov = pred_cov + noise_scale*np.eye(len(xstar))
     return pred_mean, pred_cov
Ejemplo n.º 8
0
 def predict(params, xstar, with_noise=False, FITC=False):
     """Returns the predictive mean and covariance at locations xstar,
        of the latent function value f (without observation noise)."""
     mean, cov_params, noise_scale, x0, y0 = unpack_gp_params(params)
     cov_f_f = cov_func(cov_params, xstar, xstar)
     cov_y_f = cov_func(cov_params, x0, xstar)
     cov_y_y = cov_func(cov_params, x0, x0) + noise_scale * np.eye(len(y0))
     pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y0 - mean)
     pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)
     if FITC:
         pred_cov = np.diag(np.diag(pred_cov))
     if with_noise:
         pred_cov = pred_cov + noise_scale * np.eye(len(xstar))
     return pred_mean, pred_cov
Ejemplo n.º 9
0
    def gp2(self, m, s):
        assert hasattr(self, "hyp")
        self.cache()

        x = np.atleast_2d(self.inputs)
        y = np.atleast_2d(self.targets)
        n, D = x.shape
        n, E = y.shape

        X = self.hyp
        beta = self.alpha

        m = np.atleast_2d(m)
        inp = x - m

        # Compute the predicted mean and IO covariance.
        iL = np.stack([np.diag(exp(-X[i, :D])) for i in range(E)])
        iN = np.matmul(inp, iL)
        B = iL @ s @ iL + np.eye(D)
        t = np.stack([solve(B[i].T, iN[i].T).T for i in range(E)])
        q = exp(-np.sum(iN * t, 2) / 2)
        qb = q * beta.T
        tiL = np.matmul(t, iL)
        c = exp(2 * X[:, D]) / sqrt(det(B))

        M = np.sum(qb, 1) * c
        V = (np.transpose(tiL, [0, 2, 1]) @ np.expand_dims(qb, 2)).reshape(
            E, D).T * c
        k = 2 * X[:, D].reshape(E, 1) - np.sum(iN**2, 2) / 2

        # Compute the predicted covariance.
        inp = np.expand_dims(inp, 0) / np.expand_dims(exp(2 * X[:, :D]), 1)
        ii = np.repeat(inp[:, newaxis, :, :], E, 1)
        ij = np.repeat(inp[newaxis, :, :, :], E, 0)

        iL = np.stack([np.diag(exp(-2 * X[i, :D])) for i in range(E)])
        siL = np.expand_dims(iL, 0) + np.expand_dims(iL, 1)
        R = np.matmul(s, siL) + np.eye(D)
        t = 1 / sqrt(det(R))
        iRs = np.stack(
            [solve(R.reshape(-1, D, D)[i], s) for i in range(E * E)])
        iRs = iRs.reshape(E, E, D, D)
        Q = exp(k[:, newaxis, :, newaxis] + k[newaxis, :, newaxis, :] +
                maha(ii, -ij, iRs / 2))

        S = t * np.einsum('ji,iljk,kl->il', beta, Q, beta) + 1e-6 * np.eye(E)
        S = S - np.matmul(M[:, newaxis], M[newaxis, :])

        return M, S, V
Ejemplo n.º 10
0
def gain(P, A, B, Q):
    n, m = B.shape
    AB = np.hstack([A, B])
    H = np.dot(AB.T, np.dot(P, AB)) + Q
    Hux = H[n:n + m, 0:n]
    Huu = H[n:n + m, n:n + m]
    K = -la.solve(Huu, Hux)
    return K
Ejemplo n.º 11
0
def ricc(P, A, B, Q):
    n, m = B.shape
    AB = np.hstack([A, B])
    H = np.dot(AB.T, np.dot(P, AB)) + Q
    Hxx = H[0:n, 0:n]
    Hxu = H[0:n, n:n + m]
    Hux = H[n:n + m, 0:n]
    Huu = H[n:n + m, n:n + m]
    return Hxx - np.dot(Hxu, la.solve(Huu, Hux))
Ejemplo n.º 12
0
 def calc_step_direction(self, x, obj, state_aux):
     method = self.setting.step_method
     if method == 'gradient':
         return -obj.gradient(x)
     elif method == 'newton':
         H = obj.hessian(x)
         B = posdefify(H, self.setting.pos_hess_eps)
         return -la.solve(B, obj.gradient(x))
     else:
         raise ValueError('Invalid step method!')
Ejemplo n.º 13
0
    def loss_sat(self, m, s):
        D = len(m)

        W = self.W if hasattr(self, 'W') else np.eye(D)
        z = self.z if hasattr(self, 'z') else np.zeros(D)
        m, z = np.atleast_2d(m), np.atleast_2d(z)

        sW = np.dot(s, W)
        ispW = solve((np.eye(D) + sW).T, W.T).T
        L = -exp(-(m - z) @ ispW @ (m - z).T / 2) / sqrt(det(np.eye(D) + sW))

        i2spW = solve((np.eye(D) + 2 * sW).T, W.T).T
        r2 = exp(-(m - z) @ i2spW @ (m - z).T) / sqrt(det(np.eye(D) + 2 * sW))
        S = r2 - L**2

        t = np.dot(W, z.T) - ispW @ (np.dot(sW, z.T) + m.T)
        C = L * t

        return L + 1, S, C
Ejemplo n.º 14
0
    def predict_full(params, x, y, xstar, weights):
        """Returns the predictive mean and covariance at locations xstar,
           of the latent function value f (without observation noise)."""
        mean, cov_params, noise_variance = unpack_kernel_params(params)
        cov_f_f = cov_func(cov_params, xstar, xstar)
        cov_y_f = cov_func(cov_params, x, xstar)
        cov_y_y = cov_func(cov_params, x, x) + \
            np.diag(noise_variance / weights)

        z = solve(cov_y_y, cov_y_f).T
        pred_mean = mean + np.dot(z, (y - mean))
        pred_cov = cov_f_f - np.dot(z, cov_y_f)
        return pred_mean, pred_cov
Ejemplo n.º 15
0
    def elbo(y, phi, lam, pi, psi, sigma2s, mus, Sigmas, kernel_params):
        """
        phi [N, K] sample membership (cell line cluster)
        lam [G, L] feature membership (expression cluster)
        pi [K] sample mixture weight
        psi [L] feature mixture weights
        y[N, G, T] data
        mus [K, L, T] means
        """
        """
        conditional = np.array([list(map(
            lambda f, s: norm.logpdf(y, f, s).sum(axis=-1), Q[:, :-1], Q[:, -1]))
            for Q in np.concatenate([mus, sigma2s[:, :, np.newaxis]], 2)])

        conditional = conditional + np.log(mix)[:, :, np.newaxis, np.newaxis]
        assignments = np.einsum('nk, gl->klng', phi, lam)
        likelihood = np.sum(conditional * assignments)
        """

        likelihood = 0
        # data likelihood
        for l in range(L):
            for k in range(K):
                ll = np.sum(np.nan_to_num(norm.logpdf(
                    y, mus[k, l], np.sqrt(sigma2s[k, l]))), axis=-1)
                ll = ll - 0.5 * (np.trace(Sigmas[k, l] / sigma2s[k, l]))
                ll = ll * phi[:, k][:, np.newaxis]
                ll = ll * lam[:, l]
                likelihood = likelihood + np.sum(ll)

        # assignment likelihood
        likelihood = likelihood + np.sum(np.log(pi) * phi)
        likelihood = likelihood + np.sum(np.log(psi) * lam)

        # function liklihood
        for k in range(K):
            for l in range(L):
                Ker = cov_func(kernel_params[k, l], inputs, inputs)
                likelihood = likelihood \
                    + mvn.logpdf(mus[k, l], np.zeros(T), Ker) \
                    - 0.5 * np.trace(solve(Ker, Sigmas[k, l]))

        entropy = np.sum(list(map(multinomial_entropy, phi)) +
                         list(map(multinomial_entropy, lam)))
        for k in range(K):
            for l in range(L):
                entropy = entropy + mvn.entropy(mus[k, l], Sigmas[k, l])

        return likelihood + entropy
    def log_gp_prior(y_bnn, x):
        """ computes: the expectation value of the log of the gp prior :
        E [ log p_gp(f) ] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f)
        = -0.5 * E [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition)
        (we ignore constants for now as we are not optimizing the covariance hyper-params)

        bnn_weights                   |  dim = [N_weights_samples, N_weights]
        K = covariance/Kernel matrix  |  dim = [N_data, N_data] ; dim L = dim K
        y_bnn output of a bnn         |  dim = [N_data, N_weights_samples]
        returns : E[log p_gp(y)]      |  dim = [N_function_samples] """

        K = covariance(x, x)+noise_var*np.eye(len(x))   # shape [N_data, N_data]
        L = cholesky(K)                                 # K = LL^T ; shape L = shape K
        a = solve(L, y_bnn)                             # a = L^-1 y_bnn ; shape L^-1 y_bnn =
        log_gp = -0.5*np.mean(a**2, axis=0)             # Compute E [a^2]
        return log_gp
Ejemplo n.º 17
0
    def log_gp_prior(f_bnn, x, t):
        """ computes: the expectation value of the log of the gp prior :
        E_{X~p(X)} [log p_gp(f)] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f)
        = -0.5 * E_{X~p(X)} [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition)
        (we ignore constants for now as we are not optimizing the covariance hyperparams)

        bnn_weights                   |  dim = [N_weights_samples, N_weights]
        K = covariance/Kernel matrix  |  dim = [N_data, N_data] ; dim L = dim K
        f_bnn output of a bnn         |  dim = [N_data, N_weights_samples]
        returns : E[log p_gp(f)]      |  dim = [N_function_samples] """

        s = 1e-6 * np.eye(len(x))
        K = covariance(x, x) + s  # shape [N_data, N_data]
        L = cholesky(K) + s  # shape K = LL^T
        a = solve(L, f_bnn)  # shape = shape f_bnn (L^-1 f_bnn)
        log_gp = -0.5 * np.mean(a**2, axis=0)  # Compute E_{X~p(X)}
        return log_gp
Ejemplo n.º 18
0
    def log_pdf(self, hyp):
        x = np.atleast_2d(self.inputs)
        y = np.atleast_2d(self.targets)

        n, D = x.shape
        n, E = y.shape

        hyp = hyp.reshape(E, -1)
        K = self.kernel(hyp, x)  # [E, n, n]
        L = cholesky(K)
        alpha = np.hstack([solve(K[i], y[:, i]) for i in range(E)])
        y = y.flatten(order='F')

        logp = 0.5 * n * E * log(2 * np.pi) + 0.5 * np.dot(y, alpha) + np.sum(
            [log(np.diag(L[i])) for i in range(E)])

        return logp
Ejemplo n.º 19
0
    def predict(params,
                x,
                y,
                xstar,
                weights=None,
                condense=True,
                prediction_noise=True):
        """Returns the predictive mean and covariance at locations xstar,
           of the latent function value f (without observation noise)."""

        n, t = y.shape

        if weights is None:
            weights = np.ones(n)

        if not condense:
            return predict_full(params, np.tile(x, n), y.flatten(), xstar,
                                np.tile(weights, (x.size, 1)).T.flatten())

        mean, cov_params, noise_variance = unpack_kernel_params(params)

        if n == 0:
            # no data, return the prior
            prior_mean = mean * np.ones(xstar.size)
            prior_covariance = cov_func(cov_params, xstar, xstar)
            return prior_mean, prior_covariance

        y_bar = np.dot(weights, y)
        weights_full = (np.logical_not(np.isnan(y)) *
                        weights[:, np.newaxis]).sum(axis=0)

        cov_f_f = cov_func(cov_params, xstar, xstar)
        cov_y_f = weights_full[:, np.newaxis] * cov_func(cov_params, x, xstar)

        cov_y_y = np.outer(weights_full, weights_full) * \
            cov_func(cov_params, x, x) + \
            noise_variance * np.diag(weights_full)

        z = solve(cov_y_y, cov_y_f).T
        pred_mean = mean + np.dot(z, y_bar - mean).flatten()
        pred_cov = cov_f_f - np.dot(z, cov_y_f)
        if prediction_noise:
            pred_cov = pred_cov + noise_variance * np.eye(xstar.size)
        return pred_mean, pred_cov
Ejemplo n.º 20
0
def check_are(K, A, B, Q, verbose=True):
    n, m = B.shape
    AB = np.hstack([A, B])
    PK = mat(calc_vPK(K, A, B, Q))
    H = np.dot(AB.T, np.dot(PK, AB)) + Q
    Hxx = H[0:n, 0:n]
    Huu = H[n:n + m, n:n + m]
    Hux = H[n:n + m, 0:n]
    LHS = PK
    RHS = Hxx - np.dot(Hux.T, la.solve(Huu, Hux))
    diff = la.norm(LHS - RHS)
    if verbose:
        print(' Left-hand side of the ARE: Positive definite = %s' %
              is_pos_def(LHS))
        print(LHS)
        print('')
        print('Right-hand side of the ARE: Positive definite = %s' %
              is_pos_def(RHS))
        print(RHS)
        print('')
        print('Difference')
        print(LHS - RHS)
        print('\n')
    return diff
Ejemplo n.º 21
0
def calc_vPK(K, A, B, Q):
    n, m = B.shape
    AK = calc_AK(K, A, B)
    QK = calc_QK(K, Q)
    vQK = vec(QK)
    return la.solve(np.eye(n * n) - np.kron(AK.T, AK.T), vQK)
Ejemplo n.º 22
0
def plot_gp_posterior(x, xtest, y, s=1e-4, samples=10, title="", plot='gp'):
    N = len(x)
    print(N)
    n = len(xtest)
    K = covariance(x, x) + s * np.eye(N)
    print(K.shape)
    L = cholesky(K)

    # compute the mean at our test points.
    Lk = solve(L, covariance(x, xtest))
    mu = np.dot(Lk.T, solve(L, y))

    # compute the variance at our test points.
    K_ = covariance(xtest, xtest)
    var = np.diag(K_) - np.sum(Lk**2, axis=0)
    std = np.sqrt(var)

    # draw samples from the prior at our test points.
    L = cholesky(K_ + s * np.eye(n))
    f_prior = np.dot(L, np.random.normal(size=(n, samples)))

    L = cholesky(K_ + s * np.eye(n) - np.dot(Lk.T, Lk))
    f_post = mu + np.dot(L, np.random.normal(size=(n, samples)))

    # --------------------------PLOTTING--------------------------------

    # PLOT PRIOR
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)

    ax.plot(x, y, 'ko', ms=4)

    # Get critical values for the deciles
    lvls = 0.1 * np.linspace(1, 9, 9)
    alphas = 1 - 0.5 * lvls
    zs = norm.ppf(alphas)
    pal = pal_col[plot]
    cols = colors[plot]

    print(f_prior.shape)
    print(f_post.shape)
    # plot samples, mean and deciles
    mean = np.mean(f_prior, axis=1)
    std = np.std(f_prior, axis=1)
    ax.plot(xtest, f_prior, sns.xkcd_rgb[sample_col[plot]], lw=1)

    ax.plot(xtest, mean, sns.xkcd_rgb[cols[0]], lw=1)
    print(xtest.shape, mean.shape, std.shape)
    for z, col in zip(zs, pal):
        ax.fill_between(xtest.ravel(),
                        mean - z * std,
                        mean + z * std,
                        color=col)

    plt.tick_params(labelbottom='off')
    plt.xlim([-8, 8])
    plt.legend()
    plt.savefig(title + "GP prior_draws.pdf", bbox_inches='tight')

    # PLOT POSTERIOR
    plt.clf()
    std = np.sqrt(var)
    fig = plt.figure()
    bx = fig.add_subplot(111)

    bx.plot(x, y, 'ko', ms=4)
    print(col[0])
    # plot samples, mean and deciles
    bx.plot(xtest, f_post, sns.xkcd_rgb[sample_col[plot]], lw=1)
    # bx.plot(xtest, mu, sns.xkcd_rgb[cols[0]], lw=1)
    print(xtest.shape, mu.shape, std.shape)
    mu = mu.ravel()
    #for z, col in zip(zs, pal):
    #    bx.fill_between(xtest.ravel(), mu - z * std, mu + z * std, color=col)

    plt.tick_params(labelbottom='off')
    plt.xlim([-8, 8])
    plt.ylim([-2, 3])
    plt.legend()
    plt.savefig(title + "GP post_draws.pdf", bbox_inches='tight')
Ejemplo n.º 23
0
 def check_forward(L, x, trans, lower):
     ans1 = solve(T(L) if trans in (1, 'T') else L, x)
     ans2 = solve_triangular(L, x, lower=lower, trans=trans)
     assert np.allclose(ans1, ans2)
Ejemplo n.º 24
0
    def calc_step(self, x, trust_radius, obj):
        tags = []
        method = self.setting.step_method
        if method == 'dogleg':
            n = x.size
            g = obj.gradient(x)
            H = obj.hessian(x)
            B = posdefify(H, self.setting.pos_hess_eps)

            # Find the minimizing tau along the dogleg path
            pU = -(np.dot(g, g) / np.dot(g, np.dot(B, g))) * g
            pB = -la.solve(B, g)
            dp = pB - pU
            if la.norm(pB) <= trust_radius:
                # Minimum of model lies inside the trust region
                p = np.copy(pB)
            else:
                # Minimum of model lies outside the trust region
                tau_U = trust_radius / la.norm(pU)
                if tau_U <= 1:
                    # First dogleg segment intersects trust region boundary
                    p = tau_U * pU
                else:
                    # Second dogleg segment intersects trust region boundary
                    aa = np.dot(dp, dp)
                    ab = 2 * np.dot(dp, pU)
                    ac = np.dot(pU, pU) - trust_radius**2
                    alphas = quadratic_formula(aa, ab, ac)
                    alpha = np.max(alphas)
                    p = pU + alpha * dp
            return p, tags

        elif method == '2d_subspace':
            g = obj.gradient(x)
            H = obj.hessian(x)
            B = posdefify(H, self.setting.pos_hess_eps)

            # Project g and B onto the 2D-subspace spanned by (normalized versions of) -g and -B^-1 g
            s1 = -g
            s2 = -la.solve(B, g)
            Sorig = np.vstack([s1, s2]).T
            S, Rtran = la.qr(
                Sorig
            )  # This is necessary for us to use same trust_radius before/after transforming
            g2 = np.dot(S.T, g)
            B2 = np.dot(S.T, np.dot(B, S))

            # Solve the 2D trust-region subproblem
            try:
                R, lower = cho_factor(B2)
                p2 = -cho_solve((R, lower), g2)
                p22 = np.dot(p2, p2)
                if np.dot(p2, p2) <= trust_radius**2:
                    p = np.dot(S, p2)
                    return p, tags
            except LinAlgError:
                pass

            a = B2[0, 0] * trust_radius**2
            b = B2[0, 1] * trust_radius**2
            c = B2[1, 1] * trust_radius**2

            d = g2[0] * trust_radius
            f = g2[1] * trust_radius

            coeffs = np.array(
                [-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
            t = np.roots(coeffs)  # Can handle leading zeros
            t = np.real(t[np.isreal(t)])

            p2 = trust_radius * np.vstack(
                (2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
            value = 0.5 * np.sum(p2 * np.dot(B2, p2), axis=0) + np.dot(g2, p2)
            i = np.argmin(value)
            p2 = p2[:, i]

            # Project back into the original n-dim space
            p = np.dot(S, p2)
            return p, tags

        elif method == 'cg_steihaug':
            # Settings
            max_iters = 100000  # TODO put in settings

            # Init
            n = x.size
            g = obj.gradient(x)
            B = obj.hessian(x)

            z = np.zeros(n)
            r = np.copy(g)
            d = -np.copy(g)

            # Choose eps according to Algo 7.1
            grad_norm = la.norm(g)
            eps = min(0.5, grad_norm**0.5) * grad_norm

            if la.norm(r) < eps:
                p = np.zeros(n)
                tags.append('Stopping tolerance reached!')
                return p, tags

            j = 0
            while j + 1 < max_iters:
                # Check if 'd' is a direction of non-positive curvature
                dBd = np.dot(d, np.dot(B, d))
                rr = np.dot(r, r)
                if dBd <= 0:
                    ta = np.dot(d, d)
                    tb = 2 * np.dot(d, z)
                    tc = np.dot(z, z) - trust_radius**2
                    taus = quadratic_formula(ta, tb, tc)
                    tau = np.max(taus)
                    p = z + tau * d
                    tags.append('Negative curvature encountered!')
                    return p, tags

                alpha = rr / dBd
                z_new = z + alpha * d

                # Check if trust region bound violated
                if la.norm(z_new) >= trust_radius:
                    ta = np.dot(d, d)
                    tb = 2 * np.dot(d, z)
                    tc = np.dot(z, z) - trust_radius**2
                    taus = quadratic_formula(ta, tb, tc)
                    tau = np.max(taus)
                    p = z + tau * d
                    tags.append('Trust region boundary reached!')
                    return p, tags

                z = np.copy(z_new)
                r = r + alpha * np.dot(B, d)
                rr_new = np.dot(r, r)

                if la.norm(r) < eps:
                    p = np.copy(z)
                    tags.append('Stopping tolerance reached!')
                    return p, tags

                beta = rr_new / rr
                d = -r + beta * d

                j += 1

            p = np.zeros(n)
            tags.append(
                'ALERT!  CG-Steihaug failed to solve trust-region subproblem within max_iters'
            )
            return p, tags
        else:
            raise ValueError('Invalid step method!')
Ejemplo n.º 25
0
def get_gain(A, B, Q, R, S):
    P = care(A, B, Q, R, S)
    K = -la.solve(R, B.T.dot(P) + S.T)
    return K
Ejemplo n.º 26
0
def log_gp_prior(y_bnn, K):  # [nf, nd] [nd, nd]
    """ computes: log p_gp(f), f ~ p_BNN(f) """
    L = cholesky(K)
    a = solve(L, y_bnn.T)  # a = L^-1 y_bnn  [nf, nd]
    return -0.5 * np.mean(a**2, axis=0)  # [nf]