Esempio n. 1
0
def pre_factor_kkt(Q, G, A):
    """ Perform all one-time factorizations and cache relevant matrix products"""
    nineq, nz, neq, _ = get_sizes(G, A)

    # S = [ A Q^{-1} A^T        A Q^{-1} G^T           ]
    #     [ G Q^{-1} A^T        G Q^{-1} G^T + D^{-1} ]

    U_Q = torch.potrf(Q)
    # partial cholesky of S matrix
    U_S = torch.zeros(neq + nineq, neq + nineq).type_as(Q)

    G_invQ_GT = torch.mm(G, torch.potrs(G.t(), U_Q))
    R = G_invQ_GT
    if neq > 0:
        invQ_AT = torch.potrs(A.t(), U_Q)
        A_invQ_AT = torch.mm(A, invQ_AT)
        G_invQ_AT = torch.mm(G, invQ_AT)

        # TODO: torch.potrf sometimes says the matrix is not PSD but
        # numpy does? I filed an issue at
        # https://github.com/pytorch/pytorch/issues/199
        try:
            U11 = torch.potrf(A_invQ_AT)
        except:
            U11 = torch.Tensor(np.linalg.cholesky(
                A_invQ_AT.cpu().numpy())).type_as(A_invQ_AT)

        # TODO: torch.trtrs is currently not implemented on the GPU
        # and we are using gesv as a workaround.
        U12 = torch.gesv(G_invQ_AT.t(), U11.t())[0]
        U_S[:neq, :neq] = U11
        U_S[:neq, neq:] = U12
        R -= torch.mm(U12.t(), U12)

    return U_Q, U_S, R
Esempio n. 2
0
 def GP_fit_posterior(self, mjd, mag, err, P, end=1.0, jitter=1e-5):
     """
     Expect a time series sampled at *mjd* instants (t) with values *mag* (m) and associated errors *err* (s)
     
     Returns the posterior mean and factorized covariance matrix of the GP sampled at instants x
     \[
     \mu = K_{xt} (K_{tt} + \sigma^2 I + \text{diag}(s^2))^{-1} m,
     \]
     \[
     \Sigma = K_{xx} - K_{xt} (K_{tt} + \sigma^2 I)^{-1} K_{xt}^T + \sigma^2 I
     \]
     where $\sigma^2$ is the variance of the noise.
     """
     # Kernel matrices
     non_trainable_kparams = {'period': 1.0}
     reg_points = torch.unsqueeze(torch.linspace(start=0.0, end=1.0-1.0/self.n_pivots, 
                                                 steps=self.n_pivots), dim=0)
     mjd = torch.unsqueeze(mjd, dim=0)
     Ktt = self.stationary_kernel(mjd, mjd, non_trainable_kparams)
     Ktt += torch.diag(err**2) + torch.exp(self.gp_logvar_likelihood)*torch.eye(mjd.shape[1])
     Ktx = self.stationary_kernel(mjd, reg_points, non_trainable_kparams) 
     Kxx = self.stationary_kernel(reg_points, reg_points, non_trainable_kparams)
     Ltt =  torch.potrf(Ktt, upper=False)  # Cholesky lower triangular 
     # posterior mean and covariance
     tmp1 = torch.t(torch.trtrs(Ktx, Ltt, upper=False)[0])
     tmp2 = torch.trtrs(torch.unsqueeze(mag, dim=1), Ltt, upper=False)[0]
     mu =torch.t(torch.mm(tmp1, tmp2))
     S = Kxx - torch.mm(tmp1, torch.t(tmp1)) #+ torch.exp(self.gp_logvar_likelihood)*torch.eye(self.n_pivots)
     R = torch.potrf(S + jitter*torch.eye(self.n_pivots), upper=True)
     return mu, R, reg_points
Esempio n. 3
0
 def predict_f(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = self.Z.size(0)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.kern.K(self.Z.get(), self.X)
     jitter = Variable(torch.eye(num_inducing,
                                 out=self.Z.data.new())) * self.jitter_level
     Kuu = self.kern.K(self.Z.get()) + jitter
     Kus = self.kern.K(self.Z.get(), Xnew)
     sigma = self.likelihood.variance.get()**0.5
     L = torch.potrf(Kuu, upper=False)
     A = torch.gesv(
         Kuf, L)[0] / sigma  # could use triangular solve here and below
     B = torch.matmul(A, A.t()) + Variable(
         torch.eye(num_inducing, out=A.data.new()))
     LB = torch.potrf(B, upper=False)
     Aerr = torch.matmul(A, err)
     c = torch.gesv(Aerr, LB)[0] / sigma
     tmp1, _ = torch.gesv(Kus, L)
     tmp2, _ = torch.gesv(tmp1, LB)
     mean = torch.matmul(tmp2.t(), c)
     if full_cov:
         var = self.kern.K(Xnew) + torch.matmul(
             tmp2.t(), tmp2) - torch.matmul(tmp1.t(), tmp1)
         var = var.unsqueeze(2).expand(-1, -1, self.Y.size(1))
     else:
         var = self.kern.Kdiag(Xnew) + (tmp2**2).sum(0) - (tmp1**2).sum(0)
         var = var.unsqueeze(1).expand(-1, self.Y.size(1))
     return mean + self.mean_function(Xnew), var
Esempio n. 4
0
    def get_LL(self, train_inputs, train_outputs):
        # form the necessary kernel matrices
        Knn_diag = torch.exp(self.logsigmaf2)
        train_inputs_col = torch.unsqueeze(train_inputs.transpose(0, 1), 2)
        pseudoin_row = torch.unsqueeze(self.pseudoin.transpose(0, 1), 1)
        pseudoin_col = torch.unsqueeze(self.pseudoin.transpose(0, 1), 2)
        length_factors = (1. / (2. * torch.exp(self.logl2))).reshape(self.input_dim, 1, 1)
        Knm = self.get_K(train_inputs_col, pseudoin_row, length_factors)
        Kmn = Knm.transpose(0, 1)
        Kmm = self.get_K(pseudoin_col, pseudoin_row, length_factors)
        mKmm = torch.max(Kmm)

        L_Kmm = torch.potrf(Kmm + 1e-15*mKmm*torch.eye(self.num_pseudoin, device=device, dtype=torch.double), upper=False)
        L_slash_Kmn = torch.trtrs(Kmn, L_Kmm, upper=False)[0]
        Lambda_diag = torch.zeros(train_outputs.shape[0], 1, device=device, dtype=torch.double)
        diag_values = Lambda_diag + torch.exp(self.logsigman2)

        Qmm = Kmm + Kmn.matmul(Knm/diag_values)
        mQmm = torch.max(Qmm)
        L_Qmm = torch.potrf(Qmm + 1e-15*mQmm*torch.eye(self.num_pseudoin, device=device, dtype=torch.double), upper=False) # 1e-4 for boston
        L_slash_y = torch.trtrs(Kmn.matmul(train_outputs.view(-1, 1)/diag_values), L_Qmm, upper=False)[0]

        fit = ((train_outputs.view(-1, 1))**2/diag_values).sum()-(L_slash_y**2).sum()
        log_det = 2.*torch.sum(torch.log(torch.diag(L_Qmm))) -\
            2.*torch.sum(torch.log(torch.diag(L_Kmm))) +\
            torch.sum(torch.log(diag_values))

        # get log marginal likelihood
        LL = -0.5*train_outputs.shape[0]*torch.log(2.*np.pi*torch.ones(1, device=device, dtype=torch.double)) - 0.5*log_det - 0.5*fit

        return LL
Esempio n. 5
0
def factCore(V, reduce_flag=False):
    r"""Computes :math:`K` such that :math:`I_n + VKV^\top`
    is a square-root for :math:`I_n + VV^\top`

    Arguments:
        V (Tensor): a low-rank matrix of size [n x k]
    """

    try:
        if reduce_flag:
            V = reduceRank(V)

        I_k = torch.eye(V.shape[1], dtype=V.dtype, device=V.device)
        L = torch.potrf(V.t() @ V, upper=False)
        M = torch.potrf(I_k + L.t() @ L, upper=False)
        Linv = torch.inverse(L)
        K = Linv.t() @ (M - I_k) @ Linv

    except RuntimeError as err:
        if reduce_flag:
            raise
        if str(err).startswith(NOT_FULL_RANK_ERR_MSG):
            warnings.warn(
                "The factor matrix is not full-rank. Torchutil will attempt to remove unused dimensions. This might impact performance."
            )
            return factCore(V, reduce_flag=True)
        else:
            raise

    return K, V
Esempio n. 6
0
 def forward(self, A, B):
     dim = A.size(0)
     logdet = torch.log(
         torch.potrf(A).diag().prod() /
         (torch.potrf(B).diag().prod() + 0.00001) + 0.00001)
     kl = torch.mm(B.inverse(), A).trace() - dim + logdet
     return 0.5 * kl
Esempio n. 7
0
def factor_solve_kkt(Q, D, G, A, rx, rs, rz, ry):
    nineq, nz, neq, _ = get_sizes(G, A)

    if neq > 0:
        H_ = torch.cat([
            torch.cat([Q, torch.zeros(nz, nineq).type_as(Q)], 1),
            torch.cat([torch.zeros(nineq, nz).type_as(Q), D], 1)
        ], 0)
        A_ = torch.cat([
            torch.cat([G, torch.eye(nineq).type_as(Q)], 1),
            torch.cat([A, torch.zeros(neq, nineq).type_as(Q)], 1)
        ], 0)
        g_ = torch.cat([rx, rs], 0)
        h_ = torch.cat([rz, ry], 0)
    else:
        H_ = torch.cat([
            torch.cat([Q, torch.zeros(nz, nineq).type_as(Q)], 1),
            torch.cat([torch.zeros(nineq, nz).type_as(Q), D], 1)
        ], 0)
        A_ = torch.cat([G, torch.eye(nineq).type_as(Q)], 1)
        g_ = torch.cat([rx, rs], 0)
        h_ = rz

    U_H_ = torch.potrf(H_)

    invH_A_ = torch.potrs(A_.t(), U_H_)
    invH_g_ = torch.potrs(g_.view(-1, 1), U_H_).view(-1)

    S_ = torch.mm(A_, invH_A_)
    U_S_ = torch.potrf(S_)
    t_ = torch.mv(A_, invH_g_).view(-1, 1) - h_
    w_ = -torch.potrs(t_, U_S_).view(-1)
    v_ = torch.potrs(-g_.view(-1, 1) - torch.mv(A_.t(), w_), U_H_).view(-1)

    return v_[:nz], v_[nz:], w_[:nineq], w_[nineq:] if neq > 0 else None
Esempio n. 8
0
	def forward(ctx, matrix):
		ctx.save_for_backward(matrix)

		try:
			chol_from_upper = torch.potrf(matrix, True)
			chol_from_lower = torch.potrf(matrix, False)
			return (torch.sum(torch.log(torch.diag(chol_from_upper)), 0, keepdim=True) + torch.sum(torch.log(torch.diag(chol_from_lower)), 0, keepdim=True)).view(1, 1)
		except RuntimeError:
			eigvals = torch.symeig(matrix)[0]
			return torch.sum(torch.log(eigvals[eigvals > 0]), 0, keepdim=True)
    def Fv(
        self
    ):  # All the necessary arguments are instance variables, so no need to pass them
        no_train = self.Xn.shape[0]
        no_inducing = self.Xm.shape[0]

        # Calculate kernel matrices
        Kmm = self.get_K(self.Xm, self.Xm)
        Knm = self.get_K(self.Xn, self.Xm)
        Kmn = Knm.transpose(0, 1)

        # calculate the 'inner matrix' and Cholesky decompose
        M = Kmm + torch.exp(-self.logsigman2) * Kmn @ Knm
        L = torch.potrf(M + torch.mean(torch.diag(M)) * self.jitter_factor *
                        torch.eye(no_inducing).type(torch.double),
                        upper=False)

        # Compute first term (log of Gaussian pdf)
        # constant term
        constant_term = -(no_train / 2) * torch.log(torch.Tensor(
            [2 * np.pi])).type(torch.double)

        # quadratic term - Yn should be a column vector
        LslashKmny = torch.trtrs(Kmn @ self.Yn, L, upper=False)[0]
        quadratic_term = -0.5 * (
            torch.exp(-self.logsigman2) * self.Yn.transpose(0, 1) @ self.Yn -
            torch.exp(-2 * self.logsigman2) *
            LslashKmny.transpose(0, 1) @ LslashKmny)

        # logdet term
        # Cholesky decompose the Kmm
        L_inducing = torch.potrf(
            Kmm + torch.mean(torch.diag(Kmm)) * self.jitter_factor *
            torch.eye(no_inducing).type(torch.double),
            upper=False)
        logdet_term = -0.5 * (2 * torch.sum(torch.log(torch.diag(L))) -
                              2 * torch.sum(torch.log(torch.diag(L_inducing)))
                              + no_train * self.logsigman2)

        #import pdb; pdb.set_trace()

        log_gaussian_term = constant_term + logdet_term + quadratic_term

        # Compute the second term (trace regulariser)
        B = torch.trtrs(Kmn, L_inducing, upper=False)[0]
        trace_term = -0.5 * torch.exp(-self.logsigman2) * (
            no_train * torch.exp(self.logsigmaf2) - torch.sum(B**2))

        return log_gaussian_term + trace_term
    def generate_momentum(self, q):
        dV = self.linkedV.getdV_tensor(q)
        msoftabsalpha = self.metric.msoftabsalpha
        gg = torch.dot(dV, dV)
        agg = msoftabsalpha * gg
        #print(gg)
        #print(agg)
        exit()
        dV = dV * math.sqrt((numpy.cosh(agg) - 1) / gg)
        mH = torch.zeros(len(dV), len(dV))
        for i in range(len(dV)):
            v = dV[i]
            L = 1.
            r = math.sqrt(L * L + v * v)
            c = L / r
            s = v / r

            mH[i, i] = r
            for j in range(len(dV)):
                vprime = dV[j]
                Lprime = mH[i, j]

                dV[j] = c * vprime - s * Lprime
                mH[i, j] = s * vprime + c * Lprime

        mH = mH * math.sqrt(gg / numpy.sinh(agg))
        #print(mH)
        exit()
        mHL = torch.potrf(mH, upper=False)
        out = point(None, self)
        out.flattened_tensor.copy_(torch.mv(mHL, torch.randn(len(dV))))
        out.load_flatten()
        return (out)
Esempio n. 11
0
 def fit(self, Y, K_dd, eps=1e-6):
     self.L = torch.potrf(K_dd + eps * torch.eye(K_dd.shape[0]),
                          upper=False)
     self.alpha = torch.trtrs(torch.trtrs(Y, self.L, upper=False)[0],
                              self.L.t(),
                              upper=True)[0]
     return self
Esempio n. 12
0
 def one_expected_improvement(self):
     assert self.my_param.shape[1] == self.x_train.shape[1]
     assert self.my_param.shape[0] == 1
     #import ipdb; ipdb.set_trace()
     f_max = self.y_train.max()
     out_covar = self.covar_pred()
     #import ipdb; ipdb.set_trace()
     out_mean = self.mean_pred()
     L_x = torch.potrf(out_covar, upper=False)
     if self.sampling_type == 'MC':
         Z = torch.normal(
             torch.ones(self.my_param.shape[0], self.sample_size))
     elif self.sampling_type == 'RQMC':
         z_normals = sobol_sequence(self.sample_size,
                                    self.my_param.shape[0],
                                    iSEED=np.random.randint(10**5),
                                    TRANSFORM=1).transpose()
         #import ipdb; ipdb.set_trace()
         Z = torch.tensor(z_normals,
                          dtype=torch.float32,
                          requires_grad=False)
     else:
         raise ValueError('samling type does not exist')
     min_value, __ = torch.min(out_mean + L_x.mm(Z), dim=0)
     inner_term = torch.max((f_max - min_value),
                            torch.zeros(self.sample_size))
     #import ipdb; ipdb.set_trace()
     return inner_term.mean()
Esempio n. 13
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = torch.potrf(K, upper=False)
    alpha, _ = torch.gesv(q_mu, L)
    KL = 0.5 * (alpha**2).sum()  # Mahalanobis term.
    num_latent = q_sqrt.size(2)
    KL += num_latent * torch.tiag(L).log().sum()  # Prior log-det term.
    KL += -0.5 * numpy.prod(q_sqrt.size()[1:])  # constant term
    Lq = batch_tril(q_sqrt.permute(2, 0, 1))  # force lower triangle
    KL += batch_diag(Lq).log().sum()  # logdet
    LiLq, _ = torch.gesv(Lq.view(-1, L.size(-1)),
                         L).view(*L.size())  # batch with same LHS
    KL += 0.5 * (LiLq**2).sum()  # Trace term
    return KL
def mvnquad(f, means, covs, H, Din, Dout=()):
    """
    Computes N Gaussian expectation integrals of a single function 'f'
    using Gauss-Hermite quadrature.

    Args:
        f: integrand function. Takes one input of shape ?xD.
        means: NxD
        covs: NxDxD
        H: Number of Gauss-Hermite evaluation points.
        Din: Number of input dimensions. Needs to be known at call-time.
        Dout: Number of output dimensions. Defaults to (). Dout is assumed
            to leave out the item index, i.e. f actually maps (?xD)->(?x*Dout).

    Returns:
        quadratures (N,*Dout)
    """
    xn, wn = mvhermgauss(H, Din)
    N = means.size(0)

    # Transform points based on Gaussian parameters
    Xt = []
    for c in covs:
        chol_cov = torch.potrf(c, upper=False) # DxD each
        Xt.append(torch.matmul(chol_cov, xn.t()))
    Xt = torch.stack(Xt, dim=0) # NxDx(H**D)
    X = 2.0 ** 0.5 * Xt + means.unsqueeze(2)  # NxDx(H**D)
    Xr = X.permute(2, 0, 1).view(-1, Din)  # (H**D*N)xD

    # Perform quadrature
    fX = f(Xr).view(*((H ** Din, N,) + Dout))
    wr = (wn * float(np.pi) ** (-Din * 0.5)).view(*((-1,) + (1,) * (1 + len(Dout))))
    return (fX * wr).sum(0)
Esempio n. 15
0
 def _set_pars(self, jitter):
     Ky = self.kernel(self.X, self.X)
     inds = list(range(len(Ky)))
     Ky[[inds], [inds]] += self.sn + jitter
     self.L = torch.potrf(Ky, upper=False)
     self.alpha = torch.trtrs(self.y, self.L, upper=False)[0]
     self.alpha = torch.trtrs(self.alpha, self.L.t(), upper=True)[0]
Esempio n. 16
0
    def loss(self, batch_size):
        mu, marginals, samples = self.forward(batch_size)

        ico = []
        det = []
        for i in range(self.num_samples):
            m = marginals[i, :, :]
            ico.append(torch.inverse(m).unsqueeze(0))
            det.append(torch.potrf(m).diag().prod()**2)

        ico = torch.cat(ico, 0).repeat(batch_size, 1, 1)
        det = torch.cat(det).unsqueeze(0)
        y = (samples - mu).view(-1, self.z_dim, 1)

        a = torch.matmul(ico, y)
        z = torch.matmul(torch.transpose(y, 1, 2), a)
        z = z.view(-1, self.num_samples)

        logq = -0.5 * z - 0.5 * self.z_dim * np.log(
            2 * np.pi) - 0.5 * torch.log(det)
        logp = self.p.logprob(samples)

        loss = log_mean_exp(logp - logq)
        loss = -torch.mean(loss)
        return loss
Esempio n. 17
0
    def predict_f(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + Variable(
            torch.eye(self.X.size(0),
                      out=self.X.data.new())) * self.likelihood.variance.get()
        L = torch.potrf(K, upper=False)
        A, _ = torch.gesv(
            Kx, L
        )  # could use triangular solve, note gesv has B first, then A in AX=B
        V, _ = torch.gesv(self.Y - self.mean_function(self.X),
                          L)  # could use triangular solve
        fmean = torch.mm(A.t(), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - torch.mm(A.t(), A)
            fvar = fvar.unsqueeze(2).expand(fvar.size(0), fvar.size(1),
                                            self.Y.size(1))
        else:
            fvar = self.kern.Kdiag(Xnew) - (A**2).sum(0)
            fvar = fvar.view(-1, 1)
            fvar = fvar.expand(fvar.size(0), self.Y.size(1))
        return fmean, fvar
Esempio n. 18
0
def generate_momentum_wrap(metric, var_vec=None, Cov=None, V=None, alpha=None):
    # Cov is the covariance of the momentum distribution , NNNNNOOOOOOTTTTT the empirical sample covariance
    # the covariance for momentum = Cov^-1
    # returns tensor
    # generate from prob(p given q)
    if (metric == "unit_e"):

        def generate(q):
            return (torch.randn(len(q)))
    elif (metric == "diag_e"):
        sd = torch.sqrt(var_vec)

        #inv_sd = 1/sd
        def generate(q):
            return (torch.randn(len(q)) * sd)
    elif (metric == "dense_e"):
        #print(Cov)
        L = torch.potrf(a=Cov, upper=False)
        L_t = L.t()

        #L_inv = torch.inverse(L)
        def generate(q):
            return (torch.mv(L_t, torch.randn(len(q))))
    elif (metric == "softabs"):

        def generate(q):
            lam, Q = eigen(getH(q, V).data)
            temp = torch.mm(Q, torch.diag(torch.sqrt(softabs_map(lam, alpha))))
            out = torch.mv(temp, torch.randn(len(lam)))
            return (out)
    else:
        # should raise error here
        return ("error")
    return (generate)
Esempio n. 19
0
    def __init__(self, x_dim, h_dim, t_dim):
        super(VAE_BKDG, self).__init__()

        self.x_dim = x_dim
        self.h_dim = h_dim
        self.t_dim = t_dim
        d_dim = int(x_dim/t_dim)
        self.d_dim = d_dim
        l_dim = int(d_dim * (d_dim+1)/2)
        self.l_dim = l_dim
        z_dim = t_dim * l_dim
        self.z_dim = z_dim

        # feature
        self.fc0 = nn.Linear(x_dim, h_dim)
        self.fc1 = nn.Linear(h_dim, h_dim)
        # encode
        self.fc21 = nn.Linear(h_dim, z_dim)
        self.fc22 = nn.Linear(h_dim, int(t_dim*(t_dim+1)/2))
        # transform
        self.fc2 = nn.Linear(z_dim, h_dim)
        self.fc3 = nn.Linear(h_dim, h_dim)
        # decode
        self.fc41 = nn.Linear(h_dim, x_dim)
        self.fc42 = nn.Linear(h_dim, x_dim)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

        t = torch.linspace(0,2,steps=t_dim+1); t = t[1:]
        self.K = Variable(torch.exp(-torch.pow(t.unsqueeze(1)-t.unsqueeze(0),2)/2/2) + 1e-4*torch.eye(t_dim))
        self.Kh = torch.potrf(self.K)
        self.iK = torch.potri(self.Kh)
Esempio n. 20
0
    def set_metric(self, input_var):
        # input: either flattened empircial covariance for dense_e or
        # flattened var tensor for diag_e
        if self.name == "diag_e":
            try:
                # none of the variances or negative
                assert not sum(input_var < 0) > 0
                # none of the variances are too small or too large
                assert not sum(input_var < 1e-8) > 0 and not sum(
                    input_var > 1e8) > 0
                self._flattened_var.copy_(input_var)
                #self._flattened_sd.copy_(torch.sqrt(self._flattened_var))
                self._load_flatten()
            except:
                raise ValueError("negative var or extreme var values")
        elif self.name == "dense_e":
            try:
                temp_cov_inv = torch.inverse(input_var)
                temp_cov_L = torch.potrf(input_var, upper=False)
                #self._flattened_cov.copy_(input_var)
                self._flattened_cov_L.copy_(temp_cov_L)
                self._flattened_cov_inv.copy_(temp_cov_inv)
            except:
                raise ValueError("not decomposable")

        else:
            raise ValueError(
                "should not use this function unless the metrics are diag_e or dense_e"
            )
Esempio n. 21
0
def pd_to_vec(A):
    """Convert a positive-definite matrix A to a vector l of entries from
    its cholesky factor. Diagonal entries are logged so they occupy the full
    real line, and still map back to positive values.
    """
    L = torch.potrf(A, upper=False)
    return trilpd_to_vec(L)
Esempio n. 22
0
def gauss_kl_diag(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = torch.potrf(K, upper=False)
    alpha, _ = torch.gesv(q_mu, L)
    KL = 0.5 * (alpha**2).sum()  # Mahalanobis term.
    num_latent = q_sqrt.size(1)
    KL += num_latent * torch.diag(L).log().sum()  # Prior log-det term.
    KL += -0.5 * q_sqrt.numel()  # constant term
    KL += -q_sqrt.log().sum()  # Log-det of q-cov
    K_inv, _ = torch.potrs(Variable(torch.eye(L.size(0), out=L.data.new())),
                           L,
                           upper=False)
    KL += 0.5 * (torch.diag(K_inv).unsqueeze(1) *
                 q_sqrt**2).sum()  # Trace term.
    return KL
Esempio n. 23
0
def chol_orthogonalize(vector_matrix):
    VV = vector_matrix @ vector_matrix.t() + 0.01 * to.eye(
        vector_matrix.shape[0])
    R = to.potrf(VV, upper=True)
    U = vector_matrix.t() @ to.inverse(R)

    return U
Esempio n. 24
0
    def __init__(self, x_dim, h_dim, z_dim):
        super(VAE, self).__init__()

        self.x_dim = x_dim  # ND
        self.h_dim = h_dim
        self.z_dim = z_dim  # ND^*
        self.t_dim = np.int(x_dim / (2 * z_dim / x_dim - 1))  # N
        # feature
        self.fc0 = nn.Linear(x_dim, h_dim)
        # encode
        self.fc21 = nn.Linear(h_dim, z_dim)
        self.fc22 = nn.Linear(h_dim, np.int(self.t_dim * (self.t_dim + 1) / 2))
        #         self.fc23 = nn.Linear(h_dim, z_dim)
        # transform
        self.fc3 = nn.Linear(z_dim, h_dim)
        # decode
        self.fc41 = nn.Linear(h_dim, x_dim)
        self.fc42 = nn.Linear(h_dim, x_dim)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

        # problem-specific parameters
        self.D = np.int(self.z_dim / self.x_dim * 2 - 1)
        self.N = np.int(self.x_dim / self.D)
        # GP kernel
        t = torch.linspace(0, 2, steps=self.N + 1)
        t = t[1:]
        self.K = Variable(
            torch.exp(-torch.pow(t.unsqueeze(1) - t.unsqueeze(0), 2) / 2 / 2) +
            1e-4 * torch.eye(self.N))
        self.Kh = torch.potrf(self.K)
        #         self.iK = Variable(torch.inverse(self.K.data))
        self.iK = torch.potri(self.Kh)
Esempio n. 25
0
    def forward(self, A):
        """Cholesky decomposition with jittering

        Add jitter to matrix A if A is not positive definite, increase the
        amount of jitter w.r.t number of tries.
        This function uses LAPACK routine::
            torch.potrf(A, upper=True) -> Tensor
        Only enables lower factorization, i.e. A = LL'
        """
        success = False
        max_tries = 10
        i = 0

        while i < max_tries and not success:
            i += 1
            try:
                L = torch.potrf(A, upper=False)
                success = True

            except RuntimeError as e:
                if e.args[0].startswith('Lapack Error in potrf'):
                    print('Warning: Cholesky error for the %d time' % i)
                    A += A.diag().mean(0).expand(A.size(0),).diag() * 1e-6 * \
                         pow(10, i-1)
                    # print(self.flag)
                if i == max_tries:
                    raise e

        self.save_for_backward(L)
        return L
Esempio n. 26
0
    def nystrom(Q, anorm):
        r"""
        Use the Nystrom method to obtain approximations to the
        eigenvalues and eigenvectors of A (shifting A on the subspace
        spanned by the columns of Q in order to make the shifted A be
        positive definite).
        """
        def svd_thin_matrix(A):
            r"""
            Efficient implementation of SVD on [N x D] matrix, D >> N.
            """
            (e, V) = torch.symeig(A @ A.t(), eigenvectors=True)

            Sigma = torch.sqrt(e)
            SigInv = 1 / Sigma
            SigInv[torch.isnan(SigInv)] = 0
            U = A.t() @ (V * SigInv)

            return U, Sigma, V

        anorm = .1e-6 * anorm * math.sqrt(1. * n)
        E = f(Q) + anorm * Q
        R = Q.t() @ E
        R = (R + R.t()) / 2
        R = torch.potrf(R, upper=False)  # Cholesky
        (tmp, _) = torch.gesv(E.t(), R)  # Solve
        V, d, _ = svd_thin_matrix(tmp)
        d = d * d - anorm
        return d, V
Esempio n. 27
0
    def step(self):
        for l in self.linear_layers:
            # Mini-batch updates
            # theta_(t+1) = theta_t + 2 * (gamma * I_hat + N * grad_avg(theta_t; X_t))^⁻1 * ( grad(log p(theta_t)) + N * grad_avg(theta_t) + eta_t)
            # with eta_t ~ N(0, 4 * B / eta_t)

            # According to Ahn et al. (2012): B \propto N*I_hat
            # Porbably scale problem here!!!
            # if self.t < 10:
            #     I_hat_inv = torch.eye(self.I_hat[l].size(0))
            # else:
            eps = 1e-8 * 10**-(self.t // 10)
            B = self.I_hat[l]
            mat = self.gamma * self.I_hat[l] + 4. * B / self.epsilon
            mat_inv = torch.inverse(mat.add(torch.eye(self.I_hat[l].size(0))))
            #mat_inv = torch.inverse(mat)

            # Cholesky factor of matrix B

            B_ch = torch.potrf(B.add(eps, torch.eye(self.I_hat[l].size(0))),
                               upper=True)
            #B_ch = torch.potrf(B, upper=False)

            noise = (self.noise_factor * B_ch).mm(
                torch.randn_like(self.grad_mean[l]))

            # Update in parameter space
            update = 2. * (mat_inv).mm((self.grad_mean[l]).add_(
                self.lambda_ / self.N, l.weight.data).add_(noise))
            l.weight.data.add_(-update)
            #print(update)
        self.t += 1
Esempio n. 28
0
def test_interpolated_toeplitz_gp_marginal_log_likelihood_forward():
    x = Variable(torch.linspace(0, 1, 5))
    y = torch.randn(5)
    noise = torch.Tensor([1e-4])
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(10, grid_bounds=(0, 1))
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))
    c = covar_x.c.data
    T = utils.toeplitz.sym_toeplitz(c)

    W_left = index_coef_to_sparse(covar_x.J_left, covar_x.C_left, len(c))
    W_right = index_coef_to_sparse(covar_x.J_right, covar_x.C_right, len(c))

    W_left_dense = W_left.to_dense()
    W_right_dense = W_right.to_dense()

    WTW = W_left_dense.matmul(T.matmul(W_right_dense.t())) + torch.eye(len(x)) * 1e-4

    quad_form_actual = y.dot(WTW.inverse().matmul(y))
    chol_T = torch.potrf(WTW)
    log_det_actual = chol_T.diag().log().sum() * 2

    actual = -0.5 * (log_det_actual + quad_form_actual + math.log(2 * math.pi) * len(y))

    res = InterpolatedToeplitzGPMarginalLogLikelihood(W_left, W_right, num_samples=1000)(Variable(c),
                                                                                         Variable(y),
                                                                                         Variable(noise)).data
    assert all(torch.abs((res - actual) / actual) < 0.05)
Esempio n. 29
0
def generate_momentum_wrap(metric, var_vec=None, Cov=None, V=None, alpha=None):
    # returns tensor
    if (metric == "unit_e"):

        def generate(q):
            return (torch.randn(len(q)))
    elif (metric == "diag_e"):
        sd = torch.sqrt(var_vec)
        inv_sd = 1 / sd

        def generate(q):
            return (torch.randn(len(q)) * inv_sd)
    elif (metric == "dense_e"):
        L = torch.potrf(Cov, upper=False)
        L_inv = torch.inverse(L)

        def generate(q):
            return (torch.mv(L_inv, torch.randn(len(q))))
    elif (metric == "softabs"):

        def generate(q):
            lam, Q = eigen(getH(q, V).data)
            temp = torch.mm(Q, torch.diag(torch.sqrt(softabs_map(lam, alpha))))
            out = torch.mv(temp, torch.randn(len(lam)))
            return (out)
    else:
        # should raise error here
        return ("error")
    return generate
Esempio n. 30
0
 def train_locator_model(self, model_XTX, model_XTY, model=None):
     if model is None:
         model = torch.potrs(model_XTY, torch.potrf(model_XTX))
     else:
         for _ in range(30):
             model, _ = torch.trtrs(model_XTY - torch.mm(torch.triu(model_XTX, diagonal=1), model), torch.tril(model_XTX, diagonal=0), upper=False)
     return model
Esempio n. 31
0
 def forward(ctx, a, upper=True):
     ctx.upper = upper
     fact = torch.potrf(a, upper)
     ctx.save_for_backward(fact)
     return fact