Ejemplo n.º 1
0
def analytical_q_cholesky(data, decoder_weights, decoder_bias, beta=1):
    '''
    Compute the mean and covariance of the analytical q_beta with cholesky decomposition

    '''

    W = decoder_weights
    b = decoder_bias

    WT = torch.t(W)
    I_x = torch.eye(data.size()[-1])
    I_z = torch.eye(W.size()[-1])
    data = torch.t(data)

    b = torch.unsqueeze(b, dim=1)
    subcore = torch.matmul(W, WT) + (1. / beta) * I_x

    L = torch.cholesky(subcore, upper=False)
    LT_XT = torch.trtrs(W, L, upper=False)[0]
    X_T = torch.trtrs(LT_XT, torch.t(L), upper=True)[0]

    core = torch.t(X_T)

    mu = torch.matmul(core, (data - b))

    cov = I_z - torch.matmul(core, W)

    return mu, cov
Ejemplo n.º 2
0
 def GP_fit_posterior(self, mjd, mag, err, P, end=1.0, jitter=1e-5):
     """
     Expect a time series sampled at *mjd* instants (t) with values *mag* (m) and associated errors *err* (s)
     
     Returns the posterior mean and factorized covariance matrix of the GP sampled at instants x
     \[
     \mu = K_{xt} (K_{tt} + \sigma^2 I + \text{diag}(s^2))^{-1} m,
     \]
     \[
     \Sigma = K_{xx} - K_{xt} (K_{tt} + \sigma^2 I)^{-1} K_{xt}^T + \sigma^2 I
     \]
     where $\sigma^2$ is the variance of the noise.
     """
     # Kernel matrices
     non_trainable_kparams = {'period': 1.0}
     reg_points = torch.unsqueeze(torch.linspace(start=0.0, end=1.0-1.0/self.n_pivots, 
                                                 steps=self.n_pivots), dim=0)
     mjd = torch.unsqueeze(mjd, dim=0)
     Ktt = self.stationary_kernel(mjd, mjd, non_trainable_kparams)
     Ktt += torch.diag(err**2) + torch.exp(self.gp_logvar_likelihood)*torch.eye(mjd.shape[1])
     Ktx = self.stationary_kernel(mjd, reg_points, non_trainable_kparams) 
     Kxx = self.stationary_kernel(reg_points, reg_points, non_trainable_kparams)
     Ltt =  torch.potrf(Ktt, upper=False)  # Cholesky lower triangular 
     # posterior mean and covariance
     tmp1 = torch.t(torch.trtrs(Ktx, Ltt, upper=False)[0])
     tmp2 = torch.trtrs(torch.unsqueeze(mag, dim=1), Ltt, upper=False)[0]
     mu =torch.t(torch.mm(tmp1, tmp2))
     S = Kxx - torch.mm(tmp1, torch.t(tmp1)) #+ torch.exp(self.gp_logvar_likelihood)*torch.eye(self.n_pivots)
     R = torch.potrf(S + jitter*torch.eye(self.n_pivots), upper=True)
     return mu, R, reg_points
Ejemplo n.º 3
0
def update_precond_kron(Ql, Qr, dX, dG, step=0.01):
    """
    update Kronecker product preconditioner P = kron_prod(Qr^T*Qr, Ql^T*Ql)
    Ql: (left side) Cholesky factor of preconditioner with positive diagonal entries
    Qr: (right side) Cholesky factor of preconditioner with positive diagonal entries
    dX: perturbation of (matrix) parameter
    dG: perturbation of (matrix) gradient
    step: normalized step size in range [0, 1] 
    """
    max_l = torch.max(torch.abs(Ql))
    max_r = torch.max(torch.abs(Qr))

    rho = torch.sqrt(max_l / max_r)
    Ql = Ql / rho
    Qr = rho * Qr

    A = Ql.mm(dG.mm(Qr.t()))
    Bt = torch.trtrs((torch.trtrs(dX.t(), Qr.t(), upper=False))[0].t(),
                     Ql.t(),
                     upper=False)[0]

    grad1 = torch.triu(A.mm(A.t()) - Bt.mm(Bt.t()))
    grad2 = torch.triu(A.t().mm(A) - Bt.t().mm(Bt))

    step1 = step / (torch.max(torch.abs(grad1)) + _tiny)
    step2 = step / (torch.max(torch.abs(grad2)) + _tiny)

    return Ql - step1 * grad1.mm(Ql), Qr - step2 * grad2.mm(Qr)
Ejemplo n.º 4
0
 def _ssor_preconditioner(self, A, v):
     DL = A.tril()
     D = A.diag()
     upper_part = (1 / D).expand_as(DL).mul(DL.t())
     Minv_times_v = torch.trtrs(
         torch.trtrs(v, DL, upper=False)[0], upper_part)[0].squeeze()
     return Minv_times_v
Ejemplo n.º 5
0
    def backward(self, grad_output):
        """ Giles, 2008, An extended collection of matrix derivative results
        for forward and reverse mode algorithmic differentiation), sec 2.3.1.

        Args:
            grad_output(sequence of (Tensor, Variable or None)): Gradients
                of the objective function w.r.t. each element of matrix X
                (output of :func:`forward`)

        Returns:
             Tensor: gradient w.r.t. A (triangular matrix)
        """
        grad_A = grad_B = None
        A, X = self.saved_tensors

        if self.needs_input_grad[0]:
            grad_A = -torch.trtrs(grad_output,
                                  A,
                                  self.upper,
                                  transpose=True,
                                  unitriangular=False)[0].mm(X.t())
            if self.upper:
                grad_A = torch.triu(grad_A)
            else:
                grad_A = torch.tril(grad_A)

        if self.needs_input_grad[1]:
            grad_B = torch.trtrs(grad_output,
                                 A,
                                 self.upper,
                                 transpose=True,
                                 unitriangular=False)[0]

        return grad_A, grad_B
Ejemplo n.º 6
0
 def _set_pars(self, jitter):
     Ky = self.kernel(self.X, self.X)
     inds = list(range(len(Ky)))
     Ky[[inds], [inds]] += self.sn + jitter
     self.L = torch.potrf(Ky, upper=False)
     self.alpha = torch.trtrs(self.y, self.L, upper=False)[0]
     self.alpha = torch.trtrs(self.alpha, self.L.t(), upper=True)[0]
Ejemplo n.º 7
0
 def fit(self, Y, K_dd, eps=1e-6):
     self.L = torch.potrf(K_dd + eps * torch.eye(K_dd.shape[0]),
                          upper=False)
     self.alpha = torch.trtrs(torch.trtrs(Y, self.L, upper=False)[0],
                              self.L.t(),
                              upper=True)[0]
     return self
Ejemplo n.º 8
0
def _kl_lowrankmultivariatenormal_multivariatenormal(p, q):
    if p.event_shape != q.event_shape:
        raise ValueError(
            "KL-divergence between two (Low Rank) Multivariate Normals with\
                          different event shapes cannot be computed")

    term1 = (
        2 *
        q._unbroadcasted_scale_tril.diagonal(dim1=-2, dim2=-1).log().sum(-1) -
        _batch_lowrank_logdet(p._unbroadcasted_cov_factor,
                              p._unbroadcasted_cov_diag, p._capacitance_tril))
    term3 = _batch_mahalanobis(q._unbroadcasted_scale_tril, (q.loc - p.loc))
    # Expands term2 according to
    # inv(qcov) @ pcov = inv(q_tril @ q_tril.T) @ (pW @ pW.T + pD)
    combined_batch_shape = torch._C._infer_size(
        q._unbroadcasted_scale_tril.shape[:-2],
        p._unbroadcasted_cov_factor.shape[:-2])
    n = p.event_shape[0]
    q_scale_tril = q._unbroadcasted_scale_tril.expand(combined_batch_shape +
                                                      (n, n))
    p_cov_factor = p._unbroadcasted_cov_factor.expand(combined_batch_shape +
                                                      (n,
                                                       p.cov_factor.size(-1)))
    p_cov_diag = (torch.diag_embed(
        p._unbroadcasted_cov_diag.sqrt()).expand(combined_batch_shape +
                                                 (n, n)))
    term21 = _batch_trace_XXT(
        torch.trtrs(p_cov_factor, q_scale_tril, upper=False)[0])
    term22 = _batch_trace_XXT(
        torch.trtrs(p_cov_diag, q_scale_tril, upper=False)[0])
    term2 = term21 + term22
    return 0.5 * (term1 + term2 + term3 - p.event_shape[0])
Ejemplo n.º 9
0
    def get_LL(self, train_inputs, train_outputs):
        # form the necessary kernel matrices
        Knn_diag = torch.exp(self.logsigmaf2)
        train_inputs_col = torch.unsqueeze(train_inputs.transpose(0, 1), 2)
        pseudoin_row = torch.unsqueeze(self.pseudoin.transpose(0, 1), 1)
        pseudoin_col = torch.unsqueeze(self.pseudoin.transpose(0, 1), 2)
        length_factors = (1. / (2. * torch.exp(self.logl2))).reshape(self.input_dim, 1, 1)
        Knm = self.get_K(train_inputs_col, pseudoin_row, length_factors)
        Kmn = Knm.transpose(0, 1)
        Kmm = self.get_K(pseudoin_col, pseudoin_row, length_factors)
        mKmm = torch.max(Kmm)

        L_Kmm = torch.potrf(Kmm + 1e-15*mKmm*torch.eye(self.num_pseudoin, device=device, dtype=torch.double), upper=False)
        L_slash_Kmn = torch.trtrs(Kmn, L_Kmm, upper=False)[0]
        Lambda_diag = torch.zeros(train_outputs.shape[0], 1, device=device, dtype=torch.double)
        diag_values = Lambda_diag + torch.exp(self.logsigman2)

        Qmm = Kmm + Kmn.matmul(Knm/diag_values)
        mQmm = torch.max(Qmm)
        L_Qmm = torch.potrf(Qmm + 1e-15*mQmm*torch.eye(self.num_pseudoin, device=device, dtype=torch.double), upper=False) # 1e-4 for boston
        L_slash_y = torch.trtrs(Kmn.matmul(train_outputs.view(-1, 1)/diag_values), L_Qmm, upper=False)[0]

        fit = ((train_outputs.view(-1, 1))**2/diag_values).sum()-(L_slash_y**2).sum()
        log_det = 2.*torch.sum(torch.log(torch.diag(L_Qmm))) -\
            2.*torch.sum(torch.log(torch.diag(L_Kmm))) +\
            torch.sum(torch.log(diag_values))

        # get log marginal likelihood
        LL = -0.5*train_outputs.shape[0]*torch.log(2.*np.pi*torch.ones(1, device=device, dtype=torch.double)) - 0.5*log_det - 0.5*fit

        return LL
Ejemplo n.º 10
0
    def _ssor_preconditioner(self, lhs_mat, mat):
        if lhs_mat.ndimension() == 2:
            DL = lhs_mat.tril()
            D = lhs_mat.diag()
            upper_part = (1 / D).expand_as(DL).mul(DL.t())
            Minv_times_mat = torch.trtrs(
                torch.trtrs(mat, DL, upper=False)[0], upper_part)[0]

        elif lhs_mat.ndimension() == 3:
            if mat.size(0) == 1 and lhs_mat.size(0) != 1:
                mat = mat.expand(*([lhs_mat.size(0)] + list(mat.size())[1:]))
            Minv_times_mat = mat.new(*mat.size())
            for i in range(lhs_mat.size(0)):
                DL = lhs_mat[i].tril()
                D = lhs_mat[i].diag()
                upper_part = (1 / D).expand_as(DL).mul(DL.t())
                Minv_times_mat[i].copy_(
                    torch.trtrs(
                        torch.trtrs(mat[i], DL, upper=False)[0],
                        upper_part)[0])

        else:
            raise RuntimeError('Invalid number of dimensions')

        return Minv_times_mat
Ejemplo n.º 11
0
 def weight_inverse(self):
     """Cost:
         inverse = O(D^3)
     where:
         D = num of features
     """
     lower, upper = self._create_lower_upper()
     identity = torch.eye(self.features, self.features)
     lower_inverse, _ = torch.trtrs(identity, lower, upper=False, unitriangular=True)
     weight_inverse, _ = torch.trtrs(lower_inverse, upper, upper=True, unitriangular=False)
     return weight_inverse
Ejemplo n.º 12
0
 def solve_linear_system(self, A, b, K, delta=0.0):
     I = torch.eye(self.N, self.N, device=self.device)
     A_t_K = torch.mm(A.t(), K)
     A_t_A = torch.mm(A_t_K, A)
     LAM = A_t_A + delta * I
     R = torch.mm(A_t_K, b)
     #Solve using cholesky
     l = torch.cholesky(LAM, upper=False)
     z = torch.trtrs(R, l, transpose=False, upper=False)[0]
     dtheta = torch.trtrs(z, l, transpose=True, upper=False)[0]
     return dtheta.view(self.num_traj_states, self.state_dim)
Ejemplo n.º 13
0
 def backward(self, grad_output):
     """
     Reference:
         eqn (10) & (9) in Iain Murray, 2016, arXiv:1602.07527
     """
     L, = self.saved_tensors
     P = torch.tril(torch.mm(L.t(), grad_output))
     P -= P.diag().diag() / 2.
     S = torch.trtrs(torch.trtrs(P + P.t(), L.t(), upper=True)[0].t(),
                     L.t(),
                     upper=True)[0]
     return S / 2.
    def Fv(
        self
    ):  # All the necessary arguments are instance variables, so no need to pass them
        no_train = self.Xn.shape[0]
        no_inducing = self.Xm.shape[0]

        # Calculate kernel matrices
        Kmm = self.get_K(self.Xm, self.Xm)
        Knm = self.get_K(self.Xn, self.Xm)
        Kmn = Knm.transpose(0, 1)

        # calculate the 'inner matrix' and Cholesky decompose
        M = Kmm + torch.exp(-self.logsigman2) * Kmn @ Knm
        L = torch.potrf(M + torch.mean(torch.diag(M)) * self.jitter_factor *
                        torch.eye(no_inducing).type(torch.double),
                        upper=False)

        # Compute first term (log of Gaussian pdf)
        # constant term
        constant_term = -(no_train / 2) * torch.log(torch.Tensor(
            [2 * np.pi])).type(torch.double)

        # quadratic term - Yn should be a column vector
        LslashKmny = torch.trtrs(Kmn @ self.Yn, L, upper=False)[0]
        quadratic_term = -0.5 * (
            torch.exp(-self.logsigman2) * self.Yn.transpose(0, 1) @ self.Yn -
            torch.exp(-2 * self.logsigman2) *
            LslashKmny.transpose(0, 1) @ LslashKmny)

        # logdet term
        # Cholesky decompose the Kmm
        L_inducing = torch.potrf(
            Kmm + torch.mean(torch.diag(Kmm)) * self.jitter_factor *
            torch.eye(no_inducing).type(torch.double),
            upper=False)
        logdet_term = -0.5 * (2 * torch.sum(torch.log(torch.diag(L))) -
                              2 * torch.sum(torch.log(torch.diag(L_inducing)))
                              + no_train * self.logsigman2)

        #import pdb; pdb.set_trace()

        log_gaussian_term = constant_term + logdet_term + quadratic_term

        # Compute the second term (trace regulariser)
        B = torch.trtrs(Kmn, L_inducing, upper=False)[0]
        trace_term = -0.5 * torch.exp(-self.logsigman2) * (
            no_train * torch.exp(self.logsigmaf2) - torch.sum(B**2))

        return log_gaussian_term + trace_term
Ejemplo n.º 15
0
def cho_solve_AXB(a, cho_C, b):
    """Compute tensor $a C^{-1} b$ from cholesky factor.

    ----
    Parameters:
        a: (M x N) tensor
        cho_C: (N x N) lower triangular tensor where cho_C cho_C^T = C
        b: (N x L) tensor
    ----
    Outputs:
        a C^{-1} b
    """
    left, _ = torch.trtrs(a.t(), cho_C, upper=False)
    right, _ = torch.trtrs(b, cho_C, upper=False)

    return torch.mm(left.t(), right)
Ejemplo n.º 16
0
    def linearised_laplace_direct_cholesky(self,
                                           L,
                                           test_inputs,
                                           optimizer=None):
        # do a numerically stable version of the algorithm
        if self.learned_noise_var == True:
            noise_variance = self.get_noise_var(self.noise_var_param)
        else:
            noise_variance = self.noise_variance

        # get list of test gradients
        no_test = test_inputs.size()[0]
        G = torch.cuda.DoubleTensor(self.no_params, no_test).fill_(0)
        for i in range(no_test):
            # clear gradients
            optimizer.zero_grad()
            # get gradient of output wrt single test input
            x = test_inputs[i]
            x = torch.unsqueeze(
                x, 0)  # this may not be necessary if x is multidimensional
            gradient = self.get_gradient(x)
            # store in G
            G[:, i] = gradient

        # backsolve for all columns
        LslashG = torch.trtrs(G, L, upper=False)[0]

        # batch dot product
        predictive_var = noise_variance + torch.sum(LslashG**2, 0)
        return predictive_var.detach()
Ejemplo n.º 17
0
def _kl_multivariatenormal_lowrankmultivariatenormal(p, q):
    if p.event_shape != q.event_shape:
        raise ValueError(
            "KL-divergence between two (Low Rank) Multivariate Normals with\
                          different event shapes cannot be computed")

    term1 = (
        _batch_lowrank_logdet(q._unbroadcasted_cov_factor,
                              q._unbroadcasted_cov_diag, q._capacitance_tril) -
        2 *
        p._unbroadcasted_scale_tril.diagonal(dim1=-2, dim2=-1).log().sum(-1))
    term3 = _batch_lowrank_mahalanobis(q._unbroadcasted_cov_factor,
                                       q._unbroadcasted_cov_diag,
                                       q.loc - p.loc, q._capacitance_tril)
    # Expands term2 according to
    # inv(qcov) @ pcov = [inv(qD) - inv(qD) @ qW @ inv(qC) @ qW.T @ inv(qD)] @ p_tril @ p_tril.T
    #                  = [inv(qD) - A.T @ A] @ p_tril @ p_tril.T
    qWt_qDinv = (q._unbroadcasted_cov_factor.transpose(-1, -2) /
                 q._unbroadcasted_cov_diag.unsqueeze(-2))
    A = torch.trtrs(qWt_qDinv, q._capacitance_tril, upper=False)[0]
    term21 = _batch_trace_XXT(p._unbroadcasted_scale_tril *
                              q._unbroadcasted_cov_diag.rsqrt().unsqueeze(-1))
    term22 = _batch_trace_XXT(A.matmul(p._unbroadcasted_scale_tril))
    term2 = term21 - term22
    return 0.5 * (term1 + term2 + term3 - p.event_shape[0])
Ejemplo n.º 18
0
def statdist(v):
    v = v.pop()
    with timing("statdist"):
        n = v.shape[0]
        nanguardt(v, "t_generator")
        with timing("statdist::lu_factor_torch"):
            _, v = torch.gesv(
                torch.ones([n, 1], dtype=torch.float32).to(device), v)
            del _
        nanguardt(v, "lu")
        # The last row contains 0's only.
        with timing("statdist::slices"):
            left = v[:-1, :-1]
            right = -v[:-1, -1]
            del v
        # Solves system `left * x = right`. Assumes that `left` is
        # upper-triangular (ignores lower triangle).
        #print("left shape:", left.shape, "right shape:", right.shape)
        #with timing("statdist::pytorch readback 1"):
        with timing("pytorch version"):
            res, _ = torch.trtrs(right.reshape(right.shape + (-1, )), left)
            del _
            nanguardt(res, "res")
            res = res.view(-1)
            res = torch.cat((res, torch.ones(1, device=device)))
            return nanguardt((n / torch.sum(res)), "n/sum") * res
Ejemplo n.º 19
0
    def predict(self, K_xd, K_xx):
        y = K_xd @ self.alpha

        v = torch.trtrs(K_xd.t(), self.L, upper=False)[0]
        var = (K_xx - v.t() @ v).diagonal()

        return y, var
Ejemplo n.º 20
0
    def backward(ctx, grad_output):
        jitter = 1.0e-8  # do i really need this?
        z, epsilon, L = ctx.saved_tensors

        dim = L.shape[0]
        g = grad_output
        loc_grad = sum_leftmost(grad_output, -1)

        identity = eye_like(g, dim)
        R_inv = torch.trtrs(identity, L.t(), transpose=False, upper=True)[0]

        z_ja = z.unsqueeze(-1)
        g_R_inv = torch.matmul(g, R_inv).unsqueeze(-2)
        epsilon_jb = epsilon.unsqueeze(-2)
        g_ja = g.unsqueeze(-1)
        diff_L_ab = 0.5 * sum_leftmost(g_ja * epsilon_jb + g_R_inv * z_ja, -2)

        Sigma_inv = torch.mm(R_inv, R_inv.t())
        V, D, _ = torch.svd(Sigma_inv + jitter)
        D_outer = D.unsqueeze(-1) + D.unsqueeze(0)

        expand_tuple = tuple([-1] * (z.dim() - 1) + [dim, dim])
        z_tilde = identity * torch.matmul(z, V).unsqueeze(-1).expand(*expand_tuple)
        g_tilde = identity * torch.matmul(g, V).unsqueeze(-1).expand(*expand_tuple)

        Y = sum_leftmost(torch.matmul(z_tilde, torch.matmul(1.0 / D_outer, g_tilde)), -2)
        Y = torch.mm(V, torch.mm(Y, V.t()))
        Y = Y + Y.t()

        Tr_xi_Y = torch.mm(torch.mm(Sigma_inv, Y), R_inv) - torch.mm(Y, torch.mm(Sigma_inv, R_inv))
        diff_L_ab += 0.5 * Tr_xi_Y
        L_grad = torch.tril(diff_L_ab)

        return loc_grad, L_grad, None
Ejemplo n.º 21
0
def chol_problem(final_state, target_cost_func):
    """
    Gets a quadratic model on the target cost as
    h(y) = h(x) + <∇h(x), y-x> + 0.5 <y-x, H (y-x)>
         = cste + 0.5 || L^T y - L^(-1) ∇h(x)||^2
    where, dennoting ∇h^2(x) = U D U^T, we denote H = U |D| U^T (absolute values of the eigenvalues of H are taken)
        and H = LL^T (cholesky decomposition of H)
    :param final_state: (torch.Tensor) last state on which the approximation of the cost is taken (x above)
    :param target_cost_func: (torch.nn.Module) Cost on the last state
    :return:
        chol_hess: (torch.Tensor) L above
        chol_hess_inv_grad: (torch.Tensor) L^(-1) ∇h(x) above
    """
    aux = deepcopy(final_state.data)
    aux.requires_grad = True

    target_cost = target_cost_func(aux)

    grad = torch.autograd.grad(target_cost, aux, create_graph=True)[0]
    hess = auto_jac(grad, aux)

    (lam, U) = torch.eig(hess, eigenvectors=True)
    lam = torch.abs(lam[:, 0])
    hess = torch.mm(U, torch.mm(torch.diag(lam), U.t()))

    chol_hess = torch.cholesky(hess, upper=False)
    chol_hess_inv_grad = torch.trtrs(grad, chol_hess, upper=False)[0].view(-1)
    return chol_hess, chol_hess_inv_grad
Ejemplo n.º 22
0
 def train_locator_model(self, model_XTX, model_XTY, model=None):
     if model is None:
         model = torch.potrs(model_XTY, torch.potrf(model_XTX))
     else:
         for _ in range(30):
             model, _ = torch.trtrs(model_XTY - torch.mm(torch.triu(model_XTX, diagonal=1), model), torch.tril(model_XTX, diagonal=0), upper=False)
     return model
    def joint_posterior_predictive(
            self,
            test_inputs,
            noise=False):  # assume test_inputs is a numpy array
        # get the mean and covariance of the joint Gaussian posterior over the test outputs
        test_inputs = torch.Tensor(test_inputs).type(torch.double)
        no_test = test_inputs.shape[0]
        no_inducing = self.Xm.shape[0]

        # Calculate kernel matrices
        Kxx = self.get_K(test_inputs, test_inputs)
        Kmx = self.get_K(self.Xm, test_inputs)
        Kmm = self.get_K(self.Xm, self.Xm)
        Knm = self.get_K(self.Xn, self.Xm)
        Kmn = Knm.transpose(0, 1)

        # calculate the 'inner matrix' and Cholesky decompose
        M = Kmm + torch.exp(-self.logsigman2) * Kmn @ Knm
        L = torch.potrf(M + torch.mean(torch.diag(M)) * self.jitter_factor *
                        torch.eye(no_inducing).type(torch.double),
                        upper=False)

        # Cholesky decompose the Kmm
        L_inducing = torch.potrf(
            Kmm + torch.mean(torch.diag(Kmm)) * self.jitter_factor *
            torch.eye(no_inducing).type(torch.double),
            upper=False)

        # backsolve
        LindslashKmx = torch.trtrs(Kmx, L_inducing, upper=False)[0]
        LslashKmx = torch.trtrs(Kmx, L, upper=False)[0]

        cov = Kxx - LindslashKmx.transpose(
            0, 1) @ LindslashKmx + LslashKmx.transpose(0, 1) @ LslashKmx

        if noise == True:  # add observation noise

            cov = cov + torch.exp(self.logsigman2) * torch.eye(no_test).type(
                torch.double)

        # calculate the predictive mean by backsolving
        LslashKmny = torch.trtrs(Kmn @ self.Yn, L, upper=False)[0]

        mean = torch.exp(-self.logsigman2) * LslashKmx.transpose(
            0, 1) @ LslashKmny

        return mean, cov
Ejemplo n.º 24
0
    def joint_posterior_predictive(self, train_inputs, train_outputs, test_inputs, noise=False):
        # form the necessary kernel matrices
        Knn_diag = torch.exp(self.logsigmaf2)
        train_inputs_col = torch.unsqueeze(train_inputs.transpose(0, 1), 2)
        pseudoin_row = torch.unsqueeze(self.pseudoin.transpose(0, 1), 1)
        pseudoin_col = torch.unsqueeze(self.pseudoin.transpose(0, 1), 2)
        length_factors = (1. / (2. * torch.exp(self.logl2))).reshape(self.input_dim, 1, 1)
        Knm = self.get_K(train_inputs_col, pseudoin_row, length_factors)
        Kmn = Knm.transpose(0, 1)
        Kmm = self.get_K(pseudoin_col, pseudoin_row, length_factors)
        mKmm = torch.max(Kmm)

        L_Kmm = torch.potrf(Kmm + 1e-15*mKmm*torch.eye(self.num_pseudoin, device=device, dtype=torch.double), upper=False)
        L_slash_Kmn = torch.trtrs(Kmn, L_Kmm, upper=False)[0]
        Lambda_diag = torch.zeros(train_outputs.shape[0], 1, device=device, dtype=torch.double)
        diag_values = Lambda_diag + torch.exp(self.logsigman2)

        Qmm = Kmm + Kmn.matmul(Knm/diag_values)
        mQmm = torch.max(Qmm)
        L_Qmm = torch.potrf(Qmm + 1e-15*mQmm*torch.eye(self.num_pseudoin, device=device, dtype=torch.double), upper=False) # 1e-4 for boston
        L_slash_y = torch.trtrs(Kmn.matmul(train_outputs.view(-1, 1)/diag_values), L_Qmm, upper=False)[0]

        no_test = test_inputs.size()[0]

        # get cross covariance between test and train points, Ktn
        test_inputs_col = torch.unsqueeze(test_inputs.transpose(0, 1), 2)
        test_inputs_row = torch.unsqueeze(test_inputs.transpose(0, 1), 1)
        Ktm = self.get_K(test_inputs_col, pseudoin_row, length_factors)
        Kmt = Ktm.transpose(0, 1)

        # get predictive mean
        LQslashKnt = torch.trtrs(Kmt, L_Qmm, upper=False)[0]
        LKslashKnt = torch.trtrs(Kmt, L_Kmm, upper=False)[0]
        pred_mean = LQslashKnt.transpose(0, 1) @ L_slash_y

        # get predictive covariance
        Ktt = self.get_K(test_inputs_col, test_inputs_row, length_factors)
        if noise:  # add observation noise
            pred_cov = Ktt + torch.exp(self.logsigman2) * torch.eye(no_test, device=device, dtype=torch.double) +\
                       LQslashKnt.transpose(0, 1) @ LQslashKnt -\
                       LKslashKnt.transpose(0, 1) @ LKslashKnt
        else:
            pred_cov = Ktt + LQslashKnt.transpose(0, 1) @ LQslashKnt -\
                       LslashKnt.transpose(0, 1) @ LslashKnt + 1e-6 * torch.eye(no_test, device=device, dtype=torch.double)

        return pred_mean, pred_cov
Ejemplo n.º 25
0
def _batch_trtrs_lower(bb, bA):
    """
    Applies `torch.trtrs` for batches of matrices. `bb` and `bA` should have
    the same batch shape.
    """
    flat_b = bb.reshape((-1,) + bb.shape[-2:])
    flat_A = bA.reshape((-1,) + bA.shape[-2:])
    flat_X = torch.stack([torch.trtrs(b, A, upper=False)[0] for b, A in zip(flat_b, flat_A)])
    return flat_X.reshape(bb.shape)
Ejemplo n.º 26
0
def cho_solve(cho_C, b):
    """Compute tensor $C^{-1} b$ from cholesky factor.

    ----
    Parameters:
        cho_C: (N x N) lower triangular tensor where cho_C cho_C^T = C
        b: (N x L) tensor
    ----
    Outputs:
        C^{-1} b
    ----
    Note:
        Gradient of potrs is not supperted yet in pytorch 0.4.1
        # return torch.potrs(b, cho_C, upper=False)
    """
    tmp, _ = torch.trtrs(b, cho_C, upper=False)
    tmp2, _ = torch.trtrs(tmp, cho_C.t(), upper=True)
    return tmp2
Ejemplo n.º 27
0
    def compute_sorted_nearest_neighbors(global_arrays, xbar):

        # x1 - x2
        #####Xsub = (compute_expected_responses_globals.X_tensor - xbar).transpose(0, 1)
        #x_tensor = globals()[global_arrays_class_name].X_tensor
        Xsub = (global_arrays.X_tensor - xbar).transpose(0, 1)

        #####lower_diag = compute_expected_responses_globals.hyperparameters_object.upper_diag.clone().transpose(0, 1)
        hyperparameters_obj = global_arrays.hyperparameters_object
        lower_diag = hyperparameters_obj.upper_diag.clone().transpose(0, 1)
        Z = torch.trtrs(Xsub, lower_diag, upper=False)[0].transpose(0, 1)

        # mahalanobis_distances: mahalanobis distance of each X vector to Xbar
        # L2 norm -- note: square root not necessary, since we only car about sorting not absolute actual number
        # but since speed of this call is not a bottleneck, this is fine
        mahalanobis_distances = torch.norm(Z, p=2, dim=1)

        ## SORT the data based on distance to xbar
        mahalanobis_distances_sorted, sorted_indices = torch.sort(mahalanobis_distances, 0)

        #####Y_sorted = compute_expected_responses_globals.Y_tensor[sorted_indices]  # now local scope
        Y_sorted = global_arrays.Y_tensor[sorted_indices]  # now local scope

        # adjust k to avoid eliminating equi-distant points
        #####k = compute_expected_responses_globals.hyperparameters_object.k
        k = global_arrays.hyperparameters_object.k
        inclusive_distance_boundary = mahalanobis_distances_sorted[k - 1] + 1e-7

        # cast to int because of weird incompatibility between zero-dim tensor and int in pytorch 0.4.0
        inclusive_k = int(np.searchsorted(mahalanobis_distances_sorted, inclusive_distance_boundary, side='right'))

        # get indices of nearest neighbors
        inclusive_k_nearest_neighbor_indices = np.arange(inclusive_k)

        '''
        # This is a template for applying non-naive smoother to weigh nearest-neighbor points
        # The code was functionally tested and can be used as is except the for loop for which a form of 
        # broadcasting should be found, if possible for the smoother in question (for speed)           
        weights = mahalanobis_distances[inclusive_k_nearest_neighbor_indices] / hyperparameters_object.bandwidth
        for i in inclusive_k_nearest_neighbor_indices:
            weights[i] = hyperparameters_object.smoother(weights[i])
        weights = weights / sum(weights)

        # unsqueeze(1)/view(inclusive_k, 1) for broadcast multiplication to work as expected;
        # double() needed because smoother tested (naive) spits out a float (1.0) value instead of double.
        # double() likely won't be needed if/when this actually needs to be used
        # since smoother will likely divide/multiply an existing double() and therefore return a double
        weights = weights.view(inclusive_k, 1).double()

        # E[Y|xbar], ie weighted/"smoothed" average of the Y[i,:] corresponding to the nearest inclusive_k X
        expected_response[j] = torch.sum(weights * Y_tensor[inclusive_k_nearest_neighbor_indices].view(
            inclusive_k, num_assets), 0)

        '''

        return Y_sorted[inclusive_k_nearest_neighbor_indices]
Ejemplo n.º 28
0
    def loss(self, X, y, jitter, val=None):
        K = self.kernel(X, X)
        inds = list(range(len(K)))
        K[[inds], [inds]] += self.sn + jitter
        L = torch.potrf(K, upper=False)
        alpha = torch.trtrs(y, L, upper=False)[0]
        alpha = torch.trtrs(alpha, L.t(), upper=True)[0]
        loss = self.loss_func(L, alpha, y)
        if self.prior is not None:
            loss -= self.prior(self.sn)

        if val is not None:
            X_val, y_val = val
            k_star = self.kernel(X, X_val)
            mu = k_star.t() @ alpha
            mse = nn.MSELoss()(mu, y_val)
            return loss, mse
        else:
            return loss
Ejemplo n.º 29
0
    def get_LL(self, train_inputs, train_outputs):
        # form the kernel matrix Knn
        Knn = self.get_K(train_inputs, train_inputs)

        # cholesky decompose
        L = torch.potrf(
            Knn +
            torch.exp(self.logsigman2) * torch.eye(train_inputs.shape[0]) +
            self.jitter * torch.eye(Knn.size()[0]),
            upper=False)  # lower triangular decomposition
        Lslashy = torch.trtrs(train_outputs, L, upper=False)[0]
        alpha = torch.trtrs(Lslashy, torch.transpose(L, 0, 1))[0]

        # get log marginal likelihood
        LL = -0.5 * torch.dot(train_outputs, torch.squeeze(alpha)) - torch.sum(
            torch.log(
                torch.diag(L))) - (train_inputs.shape[0] / 2) * torch.log(
                    torch.Tensor([2 * 3.1415926536]))
        return LL
Ejemplo n.º 30
0
    def posterior(self, Xtest):
        # assumes stationary kernel

        with torch.no_grad():
            if isinstance(self.y, Sparse1DTensor):
                ix = self.get_batch.ix
                Ks = self.kernel(self.X[ix], Xtest)
                L = self.get_cov(ix)
                alpha = torch.trtrs(Ks, L, upper=False)[0]
                fmean = torch.matmul(
                    torch.t(alpha),
                    torch.trtrs(self.y.v.squeeze(), L, upper=False)[0])
            else:
                Ks = self.kernel(self.X, Xtest)
                L = self.get_cov()
                alpha = torch.trtrs(Ks, L, upper=False)[0]
                fmean = torch.matmul(torch.t(alpha),
                                     torch.trtrs(self.y, L, upper=False)[0])
            fvar = transform_forward(self.kernel.variance) - (alpha**2).sum(0)

            return fmean, fvar.reshape((-1, 1))