Ejemplo n.º 1
0
def train(ep):
    model.train()
    total_loss = 0
    count = 0
    train_idx_list = np.arange(len(X_train), dtype="int32")
    np.random.shuffle(train_idx_list)
    for idx in train_idx_list:
        data_line = X_train[idx]
        x, y = Variable(data_line[:-1]), Variable(data_line[1:])
        if args.cuda:
            x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        output = model(x.unsqueeze(0)).squeeze(0)
        loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
                            torch.matmul((1 - y), torch.log(1 - output).float().t()))
        total_loss += loss.data[0]
        count += output.size(0)

        if args.clip > 0:
            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        loss.backward()
        optimizer.step()
        if idx > 0 and idx % args.log_interval == 0:
            cur_loss = total_loss / count
            print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss))
            total_loss = 0.0
            count = 0
Ejemplo n.º 2
0
 def __call__(self, y_pred, y_true=None):
     """
     y_pred should be two projections
     """
     covar_mat = th.abs(th_matrixcorr(y_pred[0].data, y_pred[1].data))
     self.corr_sum += th.trace(covar_mat)
     self.total_count += covar_mat.size(0)
     return self.corr_sum / self.total_count
Ejemplo n.º 3
0
def laplacian(x_values,target,distance="cosine",m=1,classes=10,k=None,extract=False,reg_l2=False):
    x_values = x_values.clone()
    target = target.clone()

    n_examples = x_values.size(0)
    x_values = x_values.view(n_examples,-1)


    y_true = torch.cuda.FloatTensor(n_examples,classes)
    y_true.zero_()
    y_true.scatter_(1, target.data.view(-1,1), 1)
    y_true = Variable(y_true)
    
    transposed_y_true = torch.t(y_true)
    if k is None:
        neighbours = n_examples
    else:
        neighbours = k        

    if distance == "cosine":
        normalized =  F.normalize(x_values, p=2, dim=1)
        W_tf = torch.mm(normalized,torch.t(normalized))
    
    y, ind = torch.sort(W_tf, 1)
    A = torch.zeros(*y.size()).cuda()
    k_biggest = ind[:,-neighbours:].data
    for index1,value in enumerate(k_biggest):
        A_line = A[index1]
        A_line[value] = 1
    A_final = Variable(torch.min(torch.ones(*y.size()).cuda(),A+torch.t(A)))
    new_W_tf = W_tf*A_final
    
    d_tf = torch.sum(new_W_tf,1)
    d_tf = torch.diag(d_tf)
    laplacian_tf = (d_tf - new_W_tf)
    laplacian_after_m = laplacian_tf
    for _ in range(1,m):
        laplacian_after_m = torch.mm(laplacian_after_m,laplacian_tf)
    if reg_l2 and m > 1:
        clone = torch.abs(laplacian_after_m.clone())
        mask = torch.diag(torch.ones_like(clone[0]))
        clone *= (1-mask)
        max_val = torch.max(clone.view(-1))
        laplacian_after_m /= max_val 
    if extract:
        return laplacian_after_m
    else:
        final_laplacian_tf = torch.mm(transposed_y_true, laplacian_after_m)
        final_laplacian_tf = torch.mm(final_laplacian_tf,y_true)
        final_laplacian_tf = torch.trace(final_laplacian_tf)
        return final_laplacian_tf
def IG_Loss_ZZ(netG, netIG, mb_size, Z_dim, z, use_cuda=True):
    interpolates = z
    if use_cuda:
        interpolates = interpolates.cuda()
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    G = netG(interpolates)
    z_hat = netIG(G)
    zN = z_hat.size()[1]
    #grad_list = []
    for i in range(zN):
        z_ = z_hat[:, i]
        ##print(pixel.size())
        gradients = autograd.grad(outputs=z_,
                                  inputs=interpolates,
                                  grad_outputs=torch.ones(G.size()[0]).cuda()
                                  if use_cuda else torch.ones(G.size()[0]),
                                  create_graph=True,
                                  retain_graph=True,
                                  only_inputs=True)[0]
        gradients = gradients.unsqueeze(2)
        if i == 0:
            grad = gradients
        else:
            grad = torch.cat((grad, gradients), 2)

    IG_det_penalty = 0
    ImN = grad.size()[0]
    for i in range(ImN):
        m = grad[i, :, :]
        teye = torch.eye(Z_dim).cuda() if use_cuda else torch.eye(Z_dim)
        tmp = m - teye
        IG_det_penalty += torch.trace(torch.mm(torch.t(tmp), tmp))
    """======== IG eigen penalty """
    ub = torch.Tensor([20.0])
    lb = torch.Tensor([1.0])  #Variable(
    if use_cuda:
        ub = ub.cuda()
        lb = lb.cuda()
    delta = torch.randn(mb_size, Z_dim)  # Variable()
    if use_cuda:
        delta = delta.cuda()
    eps = 0.1
    delta = (delta / delta.norm(2)) * eps
    z_t = z + delta
    Q = torch.sqrt(torch.sum(
        (netIG(netG(z_t)) - z_hat)**2)) / torch.sqrt(torch.sum((z_t - z)**2))
    Lmax = (torch.max(Q, ub) - ub)**2
    Lmin = (torch.min(Q, lb) - lb)**2
    IG_L = Lmax + Lmin
    return z_hat, IG_det_penalty, IG_L
Ejemplo n.º 5
0
    def make_d2Vdq(self, q, guess, q_last=None, fJHu=None):
        guess = self.solve(q, guess, q_last=None)
        if fJHu is None:
            f, JV, H, u = self.fem.f_J_H(q, initial_guess=guess, return_u=True)
            J = self.fsm.to_torch(JV)
        else:
            f, J, H, u = fJHu

        fhat, Jhat, Hhat = self.sem.f_J_H(q, self.params)
        fhat = fhat.view(1)
        Jhat = Jhat.view(-1)
        Hhat = Hhat.view(len(q), len(q))

        f_taylor = Taylor(f, J, H, q)
        fhat_taylor = Taylor(fhat, Jhat, H, q)
        q = torch.autograd.Variable(q.data, requires_grad=True)

        # pdb.set_trace()

        def energy(q):
            fnew = f_taylor(q) + 1e-9
            fhatnew = fhat_taylor(q) + 1e-9
            # = f/g + g/f
            # d/du = (f'g - fg')/g^2 + (g'f - gf')/f^2
            # d2/du =

            # pdb.set_trace()
            return (torch.log(fnew) -
                    torch.log(fhatnew))**2 + 1e-9 * (q**2).sum(dim=-1)

        # pdb.set_trace()
        qs = torch.stack([
            torch.autograd.Variable(q.data, requires_grad=True)
            for _ in range(len(q))
        ])

        energies = energy(qs)
        grads = torch.autograd.grad(energies.sum(), qs,
                                    create_graph=True)[0].contiguous()
        dVdq = -grads[0].data.clone()

        d2Vdq = (-torch.autograd.grad(torch.trace(grads),
                                      qs)[0].contiguous().view(len(q), len(q)))

        # Apply damping
        evals = torch.symeig(d2Vdq).eigenvalues
        if torch.min(evals) < 0:
            d2Vdq = d2Vdq + torch.eye(
                len(dVdq)) * (1e-3 + torch.abs(torch.min(evals)))

        return d2Vdq, dVdq, u.vector()
Ejemplo n.º 6
0
def test_jacobian_plowrank():
    for get_task in nonlinear_tasks:
        loader, lc, parameters, model, function, n_output = get_task()
        generator = Jacobian(layer_collection=lc,
                             model=model,
                             function=function,
                             n_output=n_output)
        PMat_lowrank = PMatLowRank(generator=generator, examples=loader)
        dw = random_pvector(lc, device=device)
        dw = dw / dw.norm()
        dense_tensor = PMat_lowrank.get_dense_tensor()

        # Test get_diag
        check_tensors(torch.diag(dense_tensor),
                      PMat_lowrank.get_diag(),
                      eps=1e-4)

        # Test frobenius
        frob_PMat = PMat_lowrank.frobenius_norm()
        frob_direct = (dense_tensor**2).sum()**.5
        check_ratio(frob_direct, frob_PMat)

        # Test trace
        trace_PMat = PMat_lowrank.trace()
        trace_direct = torch.trace(dense_tensor)
        check_ratio(trace_PMat, trace_direct)

        # Test mv
        mv_direct = torch.mv(dense_tensor, dw.get_flat_representation())
        mv = PMat_lowrank.mv(dw)
        check_tensors(mv_direct, mv.get_flat_representation())

        # Test vTMV
        check_ratio(torch.dot(mv_direct, dw.get_flat_representation()),
                    PMat_lowrank.vTMv(dw))

        # Test solve
        # We will try to recover mv, which is in the span of the
        # low rank matrix
        regul = 1e-3
        mmv = PMat_lowrank.mv(mv)
        mv_using_inv = PMat_lowrank.solve(mmv, regul=regul)
        check_tensors(mv.get_flat_representation(),
                      mv_using_inv.get_flat_representation(),
                      eps=1e-2)
        # Test inv TODO

        # Test add, sub, rmul

        check_tensors(1.23 * PMat_lowrank.get_dense_tensor(),
                      (1.23 * PMat_lowrank).get_dense_tensor())
Ejemplo n.º 7
0
    def eval_obs(self, state, env_c4v):
        r"""
        :param state: wavefunction
        :param env_c4v: CTM c4v symmetric environment
        :type state: IPEPS
        :type env_c4v: ENV_C4V
        :return:  expectation values of observables, labels of observables
        :rtype: list[float], list[str]

        Computes the following observables in order

            1. magnetization
            2. :math:`\langle S^z \rangle,\ \langle S^+ \rangle,\ \langle S^- \rangle`

        where the on-site magnetization is defined as
        
        .. math::
            
            \begin{align*}
            m &= \sqrt{ \langle S^z \rangle^2+\langle S^x \rangle^2+\langle S^y \rangle^2 }
            =\sqrt{\langle S^z \rangle^2+1/4(\langle S^+ \rangle+\langle S^- 
            \rangle)^2 -1/4(\langle S^+\rangle-\langle S^-\rangle)^2} \\
              &=\sqrt{\langle S^z \rangle^2 + 1/2\langle S^+ \rangle \langle S^- \rangle)}
            \end{align*}

        Usual spin components can be obtained through the following relations
        
        .. math::
            
            \begin{align*}
            S^+ &=S^x+iS^y               & S^x &= 1/2(S^+ + S^-)\\
            S^- &=S^x-iS^y\ \Rightarrow\ & S^y &=-i/2(S^+ - S^-)
            \end{align*}
        """
        # TODO optimize/unify ?
        # expect "list" of (observable label, value) pairs ?
        obs = dict()
        with torch.no_grad():
            rdm1x1 = rdm_c4v.rdm1x1(state, env_c4v)
            for label, op in self.obs_ops.items():
                obs[f"{label}"] = torch.trace(rdm1x1 @ op)
            obs[f"m"] = sqrt(abs(obs[f"sz"]**2 + obs[f"sp"] * obs[f"sm"]))

            rdm2x1 = rdm_c4v.rdm2x1(state, env_c4v)
            obs[f"SS2x1"] = torch.einsum('ijab,ijab', rdm2x1, self.h2_rot)

        # prepare list with labels and values
        obs_labels = [f"m"] + [f"{lc}"
                               for lc in self.obs_ops.keys()] + [f"SS2x1"]
        obs_values = [obs[label] for label in obs_labels]
        return obs_values, obs_labels
Ejemplo n.º 8
0
    def rbf_cmmd(self, sX, tX, sY, tY):
        '''
        Return CMMD score based on guassian kernel. 
        '''
        n_sample1 = sX.size(0)
        n_sample2 = tX.size(0)
        device = sX.device
        batch_size = sX.size(0)
        xkernels = self.guassian_kernel(sX,
                                        tX,
                                        kernel_mul=self.kernel_mul,
                                        kernel_num=self.kernel_num,
                                        fix_sigma=self.fix_sigma)
        ykernels = self.guassian_kernel(sY,
                                        tY,
                                        kernel_mul=self.kernel_mul,
                                        kernel_num=self.kernel_num,
                                        fix_sigma=self.fix_sigma)

        X11 = xkernels[:batch_size, :batch_size]
        X21 = xkernels[batch_size:, :batch_size]
        X22 = xkernels[batch_size:, batch_size:]

        Y11 = ykernels[:batch_size, :batch_size]
        Y12 = ykernels[:batch_size, batch_size:]
        Y22 = ykernels[batch_size:, batch_size:]
        X11_inver = torch.inverse(X11 + self.eplison * n_sample1 *
                                  torch.eye(n_sample1).to(device))
        X22_inver = torch.inverse(X22 + self.eplison * n_sample2 *
                                  torch.eye(n_sample2).to(device))

        cmmd1 = -2.0 / (n_sample1 * n_sample2) * torch.trace(
            X21.mm(X11_inver).mm(Y12).mm(X22_inver))
        cmmd2 = 1.0 / (n_sample1 * n_sample1) * torch.trace(Y11.mm(X11_inver))
        cmmd3 = 1.0 / (n_sample2 * n_sample2) * torch.trace(Y22.mm(X22_inver))

        loss = cmmd1 + cmmd2 + cmmd3
        return torch.sqrt(loss)
Ejemplo n.º 9
0
    def _torch_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
        """Pytorch implementation of the Frechet Distance.
    Taken from https://github.com/bioinf-jku/TTUR
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    Stable version by Dougal J. Sutherland.
    Params:
    -- mu1   : Numpy array containing the activations of a layer of the
               inception net (like returned by the function 'get_predictions')
               for generated samples.
    -- mu2   : The sample mean over activations, precalculated on an
               representive data set.
    -- sigma1: The covariance matrix over activations for generated samples.
    -- sigma2: The covariance matrix over activations, precalculated on an
               representive data set.
    Returns:
    --   : The Frechet Distance.
    """

        assert mu1.shape == mu2.shape, \
          'Training and test mean vectors have different lengths'
        assert sigma1.shape == sigma2.shape, \
          'Training and test covariances have different dimensions'
        import torch

        mu1 = torch.from_numpy(mu1).cuda()
        sigma1 = torch.from_numpy(sigma1).cuda()
        mu2 = torch.from_numpy(mu2).cuda()
        sigma2 = torch.from_numpy(sigma2).cuda()

        diff = mu1 - mu2
        # Run 50 itrs of newton-schulz to get the matrix sqrt of sigma1 dot sigma2
        covmean = sqrt_newton_schulz(sigma1.mm(sigma2).unsqueeze(0),
                                     50).squeeze()
        out = (diff.dot(diff) + torch.trace(sigma1) + torch.trace(sigma2) -
               2 * torch.trace(covmean))
        return out
Ejemplo n.º 10
0
def quad_approx(theta, prev_theta, S, step_size):
    f_theta = get_f_theta(theta, S)
    qt1 = get_f_theta(prev_theta, S)  # term 1 of Q
    qt2 = torch.trace(
        torch.matmul(theta - prev_theta, S - torch.inverse(prev_theta)))
    qt3 = 1 / (2 * step_size) * torch.sum(
        (theta - prev_theta)**2)  # get_frobenius_norm
    Q_eta = qt1 + qt2 + qt3
    #    print('QUAD approx: ', f_theta, Q_eta)
    #return f_theta <= Q_eta
    #return f_theta <= Q_eta or torch.abs(torch.abs(f_theta) - torch.abs(Q_eta)) <= 0.01 * torch.abs(f_theta) # difference within 1%
    return f_theta <= Q_eta or torch.abs(
        torch.abs(f_theta) -
        torch.abs(Q_eta)) <= 0.01 * torch.abs(f_theta)  # difference within 1%
Ejemplo n.º 11
0
    def compute_loss(self, X, labels):
        self.kern = self.kernel(X, X, self.W)
        K = self.kern + torch.eye(self.kern.size()[0]).to(device) * self.lambda_reg

        L = torch.cholesky(K, upper=False)
        one_hot_y = F.one_hot(labels, num_classes=10).type(torch.FloatTensor)
        # A, _ = torch.solve(kern, L)
        # V, _ = torch.solve(one_hot_y, L)
        # alpha = A.T @ V
        self.alpha = torch.cholesky_solve(one_hot_y, L, upper=False)
        # output = K.T @ self.alpha
        output = self.kern.T @ self.alpha
        loss = self.loss_fn(output, one_hot_y) + self.lambda_reg * torch.trace(self.alpha.T @ self.kern @ self.alpha)
        return loss
Ejemplo n.º 12
0
def Obsv(psi, Lpsi, T, O, beta):
    """ calculate the thermal average of the observable O as
            <I \otimes O \otimes I>_{K}
        where K is the K-matrix corresponding to <Lpsi|T|psi>
        K plays the role of effective Hamiltonian
        T: cMPO
        psi: cMPS (right eigenvector)
        Lpsi: cMPS (left eigenvector)
        O: the observable
        beta: inverse temperature
    """
    dtype, device = psi.dtype, psi.device
    totalD = O.shape[0]*psi.dim*psi.dim
    matI = torch.eye(psi.dim, dtype=dtype, device=device)
    matO = torch.einsum('ab,cd,ef->acebdf', matI, O, matI).contiguous().view(totalD, totalD) 
 
    Tpsi = act(T, psi)
    M = density_matrix(Lpsi, Tpsi)

    w, v = eigensolver(M)
    w -= w.max().item()
    expw = torch.diag(torch.exp(beta*w))
    return torch.trace(expw @ v.t() @ matO @ v).item() / torch.trace(expw).item()
Ejemplo n.º 13
0
def dtaudq(p, dH, Q, lam, alpha):
    N = len(p)
    Jm = J(lam, alpha, len(p))
    #print("eigenvalues {}".format(lam))
    #print("J {}".format(Jm))
    Dm = D(p, Q, lam, alpha)
    #print("D {}".format(Dm))
    M = torch.mm(Q, torch.mm(Dm, torch.mm(Jm, torch.mm(Dm, torch.t(Q)))))
    #print("M is {}".format(M))
    delta = torch.zeros(N)
    for i in range(N):
        delta[i] = 0.5 * torch.trace(-torch.mm(M, dH[i, :, :]))

    return (delta)
Ejemplo n.º 14
0
    def find_grad(self, s, s0, lap):
        with torch.enable_grad():
            s_cp = s.clone().detach().requires_grad_(True)
            print(s_cp.shape)
            s0_cp = s0.clone().detach().requires_grad_(True)
            print(lap.shape)
            cut =\
            torch.trace(torch.matmul(torch.transpose(self.softmax(s_cp),-1,-2),torch.matmul(lap,\
                                                                                     self.softmax(s_cp))))
            norm_reg = torch.pow(torch.norm(s0_cp - s_cp, dim=(-1, -2)), 2)
            val = cut + norm_reg
            val.backward()

        return s_cp.grad
Ejemplo n.º 15
0
 def _evalSumAcrossTrials(self, Kzz, KzzChol, qMu, qSigma):
     # ESS \in nTrials x nInd x nInd
     ESS = qSigma + torch.matmul(qMu, qMu.permute(0, 2, 1))
     nTrials = qMu.shape[0]
     answer = 0
     for trial in range(nTrials):
         _, logdetKzz = Kzz[trial, :, :].slogdet()  # O(n^3)
         _, logdetQSigma = qSigma[trial, :, :].slogdet()  # O(n^3)
         traceTerm = torch.trace(
             torch.cholesky_solve(ESS[trial, :, :], KzzChol[trial, :, :]))
         trialKL = .5 * (traceTerm + logdetKzz - logdetQSigma -
                         ESS.shape[1])
         answer += trialKL
     return answer
Ejemplo n.º 16
0
def mmd(x,y,B,alpha=1):
  ###
  # Input:
  # x = tensor of shape [B, 1, IMG_DIM, IMG_DIM] (e.g. real images)
  # y = tensor of shape [B, 1, IMG_DIM, IMG_DIM] (e.g. fake images)
  # B = batch size (or size of samples to be compared); B(x) has to be B(y)
  # alpha = kernel parameter
  #
  # Output:
  # mmd score
  ###
  x = x.view(x.size(0), x.size(2) * x.size(3))
  y = y.view(y.size(0), y.size(2) * y.size(3))
  xx, yy, zz = torch.mm(x,x.t()), torch.mm(y,y.t()), torch.mm(x,y.t())
  rx = (xx.diag().unsqueeze(0).expand_as(xx))
  ry = (yy.diag().unsqueeze(0).expand_as(yy))
  K = torch.exp(- alpha * (rx.t() + rx - 2*xx))
  L = torch.exp(- alpha * (ry.t() + ry - 2*yy))
  P = torch.exp(- alpha * (rx.t() + ry - 2*zz))
  beta = (1./(B*(B-1)))
  gamma = (2./(B*B)) 
  mmd = beta * (torch.sum(K)-torch.trace(K)+torch.sum(L)-torch.trace(L)) - gamma * torch.sum(P)
  return mmd
Ejemplo n.º 17
0
def KL_cal(a, mu, Q, eigen):
    #print("a",a)

    KL = 0
    for idx in range(N):
        #idx= 0
        this_mu = mu[:, idx, :].view(-1, z_dim)
        this_eigen = eigen[:, idx, :].view(-1, z_dim)
        this_Q = Q[:, idx, :, :].view(-1, z_dim,
                                      z_dim)  #(batch,1,latent,latent)
        diag_list = []

        for i in range(mb_size):
            diag_list.append(
                torch.diag(this_eigen[i, :]).view(1, z_dim, z_dim))

        m_diag = torch.cat(diag_list, 0)  #(batch,20)
        mul1 = batch_matmul(this_Q, m_diag)
        var = batch_matmul(mul1, torch.inverse(this_Q))

        p_mu = torch.zeros([mb_size, z_dim])
        p_sigma = torch.cat([
            torch.diag(torch.ones([z_dim], dtype=torch.float32)).view(
                [1, z_dim, z_dim])
        ] * mb_size, 0)

        ans = []
        for i in range(mb_size):
            _sigma0, _sigma1, _mu0, _mu1 = var[i, :, :], p_sigma[
                i, :, :], this_mu[i, :].view(1,
                                             z_dim), p_mu[i, :].view(1, z_dim)

            #print(torch.inverse(_sigma1))
            kl1 = torch.trace(torch.inverse(_sigma1) * _sigma0)

            #print(_mu0.shape)
            #print((_mu1-_mu0).transpose(1,0).shape)
            #print(_sigma1.shape)
            mul1 = torch.mm((_mu1 - _mu0), torch.inverse(_sigma1))  #(1,20)
            d1 = torch.det(_sigma1)
            d0 = torch.det(_sigma0)
            d = torch.log(d1 / d0)
            kl2 = torch.mm(mul1, (_mu1 - _mu0).transpose(1, 0)) - N + d

            kl = 0.5 * (kl1 + kl2)
            with torch.no_grad():
                ans.append(kl * a[i, idx])

        KL += torch.sum(torch.cat(ans, 0))
    return KL
Ejemplo n.º 18
0
def frechet_distance(mu_x, mu_y, sigma_x, sigma_y):
    """
    Function for returning the Fréchet distance between multivariate Gaussians,
    parameterized by their means and covariance matrices.
    Parameters:
        mu_x: the mean of the first Gaussian, (n_features)
        mu_y: the mean of the second Gaussian, (n_features)
        sigma_x: the covariance matrix of the first Gaussian, (n_features, n_features)
        sigma_y: the covariance matrix of the second Gaussian, (n_features, n_features)
    """
    res = torch.norm(mu_x -
                     mu_y)**2 + torch.trace(sigma_x + sigma_y -
                                            2 * matrix_sqrt(sigma_x @ sigma_y))
    return res
Ejemplo n.º 19
0
 def per_component_m_step(i):
     mu_i = torch.sum(r[:, [i]] * x, dim=0) / r_sum[i]
     s2_I = torch.exp(self.log_D[i, 0]) * torch.eye(l, device=x.device)
     inv_M_i = torch.inverse(self.A[i].T @ self.A[i] + s2_I)
     x_c = x - mu_i.reshape(1, d)
     SiAi = (1.0 / r_sum[i]) * (r[:, [i]] * x_c).T @ (x_c @ self.A[i])
     invM_AT_Si_Ai = inv_M_i @ self.A[i].T @ SiAi
     A_i_new = SiAi @ torch.inverse(s2_I + invM_AT_Si_Ai)
     t1 = torch.trace(A_i_new.T @ (SiAi @ inv_M_i))
     trace_S_i = torch.sum(N / r_sum[i] *
                           torch.mean(r[:, [i]] * x_c * x_c, dim=0))
     sigma_2_new = (trace_S_i - t1) / d
     return mu_i, A_i_new, torch.log(sigma_2_new) * torch.ones_like(
         self.log_D[i])
Ejemplo n.º 20
0
def time_test(N):
    start_time = time.perf_counter()

    A = torch.randn((N, N), requires_grad=True)

    At = torch.transpose(A, 0, 1)

    T = torch.trace(A @ At)

    T.backward()

    stop_time = time.perf_counter()

    return stop_time - start_time
Ejemplo n.º 21
0
def angle_mat(R1, R2):
    """
    :param R1: B x 3 x 3, :type torch.float
    :param R2: B x 3 x 3, :type torch.float
    :return: B, angle in degrees, :type torch.float
    """
    R_d = R1.transpose(1, 2) @ R2

    angles = torch.zeros(R1.shape[0]).to(R1.device)
    for i, i_R_d in enumerate(R_d):
        c = (torch.trace(i_R_d) - 1) / 2
        angles[i] = rad2deg(torch.acos(c.clamp(min=-1.0, max=1.0)))

    return angles
Ejemplo n.º 22
0
    def forward(self, x):
        corr_matrix = torch.zeros(self.nparts, self.nparts).cuda()
        loss = torch.zeros(1, requires_grad=True).cuda()

        x = x.reshape(x.size(0), self.nparts, -1)
        x = torch.div(x, x.norm(dim=-1, keepdim=True))

        for i in range(self.nparts):
            for j in range(self.nparts):
                corr_matrix[i, j] = torch.mean(torch.mm(x[:,i], x[:,j].t()))

        loss = (torch.sum(corr_matrix) - 3 * torch.trace(corr_matrix) + 2 * self.nparts) / 2.0

        return torch.mul(loss, self.gamma.cuda())
Ejemplo n.º 23
0
def loss_dc_whitened(embd, label, weight=None):
    if type(weight) == torch.Tensor:
        weight = torch.sqrt(weight).unsqueeze(-1)
        embd = embd * weight
        label = label * weight
    C = label.shape[2]
    D = embd.shape[2]
    VtV = embd.transpose(1, 2).bmm(embd) + 1e-24 * torch.eye(D)
    VtY = embd.transpose(1, 2).bmm(label)
    YtY = label.transpose(1, 2).bmm(label) + 1e-24 * torch.eye(C)
    return D - torch.trace(torch.sum(
        VtV.inverse().bmm(VtY).bmm(YtY.inverse()).bmm(VtY.transpose(1, 2)),
        dim=0
    )) / embd.shape[0]
Ejemplo n.º 24
0
Archivo: sdne.py Proyecto: zrt/cogdl
    def forward(self, adj_mat, l_mat):
        t0 = F.leaky_relu(self.encode0(adj_mat))
        t0 = F.leaky_relu(self.encode1(t0))
        self.embedding = t0
        t0 = F.leaky_relu(self.decode0(t0))
        t0 = F.leaky_relu(self.decode1(t0))

        L_1st = 2 * torch.trace(torch.mm(torch.mm(torch.t(self.embedding), l_mat), self.embedding))
        L_2nd = torch.sum(((adj_mat - t0) * adj_mat * self.beta) * ((adj_mat - t0) * adj_mat * self.beta))

        L_reg = 0
        for param in self.parameters():
            L_reg += self.nu1 * torch.sum(torch.abs(param)) + self.nu2 * torch.sum(param * param)
        return self.alpha * L_1st, L_2nd, self.alpha * L_1st + L_2nd, L_reg
Ejemplo n.º 25
0
    def forward(ctx, input):
        with torch.no_grad():
            # send tensor to cpu in numpy format and compute expm using scipy
            expm_input = expm(input.detach().cpu().numpy())
            # transform back into a tensor
            expm_input = torch.as_tensor(expm_input)
            if input.is_cuda:
                # expm_input = expm_input.cuda()
                assert expm_input.is_cuda
            # save expm_input to use in backward
            ctx.save_for_backward(expm_input)

            # return the trace
            return torch.trace(expm_input)
Ejemplo n.º 26
0
    def criterion(self, x, y):
        """
        Defines the HSIC criterion.

        """
        x, y = x.to(self.device), y.to(self.device)
        m = x.shape[0]
        K_x = kernel_module.get(self.kernel)(x, x, self.params_dict)
        K_y = kernel_module.get(self.kernel)(y, y, self.params_dict)
        H = torch.eye(m, m) - (1 / m) * torch.ones(m, m)
        H = H.to(self.device)
        matrix_x = torch.mm(K_x, H)
        matrix_y = torch.mm(K_y, H)
        return (1 / (m - 1)) * torch.trace(torch.mm(matrix_x, matrix_y))
Ejemplo n.º 27
0
def kl_div(output_mu, output_sig, target_mu, target_sig, device):
    """ Compute the KL-divergence between 2 Gaussian distributions
    [Lots of numerical issues]
    
    Parameters:
        output_mu (1d tensor)  -- mean of the variables
        output_sig (2d tensor) -- covariance of the variables
        target_mu (1d tensor)  -- target mean
        target_sig (2d tensor) -- target covariance
        device (int)           -- device of the arrays above
    """
    output_sig_inv = torch.inverse(output_sig)
    target_sig_inv = torch.inverse(target_sig)

    loss1 = torch.dot(output_mu-target_mu,\
                      torch.mv(output_sig_inv+
                               target_sig_inv,
                               output_mu-target_mu))
    loss2 = torch.trace(output_sig_inv.mm(target_sig_inv))\
           + torch.trace(target_sig_inv.mm(output_sig_inv))

    loss = 0.0 * loss1 + 1e-24 * torch.pow(loss2, 2)
    return loss
Ejemplo n.º 28
0
def get_duality_gap(theta, S, rho):
    #    rho = torch.Tensor([args.rho])
    #    if USE_CUDA:
    #        rho = rho.cuda()
    U = torch.min(torch.max(torch.inverse(theta) - S, -1 * rho), rho)
    #t1 = -1*torch.log(torch.det(S+U)) - args.N # term1
    #t2 = -1*torch.log(torch.det(theta))
    t1 = -1 * get_logdet(S + U) - args.N  # term1
    t2 = -1 * get_logdet(theta)
    t3 = torch.trace(torch.matmul(S, theta))
    #t4 = args.rho*torch.max(torch.sum(torch.abs(theta), 0)) # L1 norm of mat is max abs column sum
    t4 = rho * torch.sum(torch.abs(theta))  # L1 norm of mat
    #    print('DUALITY: ', t1, t2, t3, t4)
    return t1 + t2 + t3 + t4
Ejemplo n.º 29
0
def wasserstein_penalty_func(p, q):
    def cov(x: torch.tensor) -> torch.tensor:
        x = x - torch.mean(x, dim=1, keepdim=True)
        return (1. / (x.size(1) - 1)) * x.matmul(x.t())

    mean_p = p.mean(dim=1)
    cov_p = cov(p)
    mean_q = q.mean(dim=1)
    cov_q = cov(q)
    first = torch.sum(torch.pow(mean_p - mean_q, 2))
    second = cov_p.trace()
    third = cov_q.trace()
    fourth = 2 * torch.trace(torch.matmul(cov_p, cov_q).relu().sqrt())
    return first + second + third - fourth
Ejemplo n.º 30
0
 def build_loss(self, recons, weights, raw_weights):
     size = self.X.shape[0]
     loss = 0
     loss += raw_weights * torch.log(raw_weights / recons + 10**-10)
     loss = loss.sum(dim=1)
     loss = loss.mean()
     # loss += 10**-3 * (torch.mean(self.embedding.pow(2)))
     # loss += 10**-3 * (torch.mean(self.W1.pow(2)) + torch.mean(self.W2.pow(2)))
     # loss += 10**-3 * (torch.mean(self.W1.abs()) + torch.mean(self.W2.abs()))
     degree = weights.sum(dim=1)
     L = torch.diag(degree) - weights
     loss += self.lam * torch.trace(self.embedding.t().matmul(L).matmul(
         self.embedding)) / size
     return loss
Ejemplo n.º 31
0
def dphidq(lam,alpha,dH,Q,dV):
    N = len(lam)
    #print("lam is {}".format(lam))
    Jm = J(lam,alpha,len(lam))
    R = torch.diag(1/(lam*coth_torch(alpha*lam)))
    M = torch.mm(Q,torch.mm(R*Jm,torch.t(Q)))
    #print("M is {}".format(M))
    #print("dH is {}".format(dH[0,:,:]))
    #print("trace(MdH) is {}".format(torch.trace(torch.mm(M,dH[0,:,:])) ))
    #print("dV is {}".format(dV))
    delta = torch.zeros(N)
    for i in range(N):
        delta[i] = 0.5 * torch.trace(torch.mm(M,dH[i,:,:])) + dV[i]
    return(delta)
Ejemplo n.º 32
0
    def forward(self) -> Tuple[torch.Tensor, int]:
        """Return a value proportional to the log likelihood."""
        unstable = 0

        # model-implied covariance/mean
        sigma, mu = self.implied_sigma_mu()

        if self.fiml:
            # FIML -2 * logL (without constants)
            assert mu is not None
            loss = torch.zeros(1, dtype=mu.dtype, device=mu.device)
            mean_diffs: torch.Tensor = self.data - mu.t()
            for pairs in self.missing_patterns.values():
                # calculate sigma^-1 and logdet(sigma) in batches
                sigmas = torch.stack([
                    sigma.index_select(0, x[1]).index_select(1, x[1])
                    for x in pairs
                ])
                sigmas_logdet = torch.logdet(sigmas)
                sigmas = torch.inverse(sigmas)

                for i, (observations, available) in enumerate(pairs):
                    mean_diff = mean_diffs.index_select(
                        0, observations).index_select(1, available)
                    loss_current = sigmas_logdet[i] * len(
                        observations) + torch.trace(
                            mean_diff.mm(sigmas[i]).mm(mean_diff.t()))
                    unstable += loss_current.detach().item() < 0
                    loss = loss + loss_current.clamp(min=0.0)

        else:
            # maximum likelihood
            loss = torch.logdet(sigma) + torch.trace(
                self.sample_covariance.mm(torch.inverse(sigma)))
            unstable += loss.detach().item() < 0

        return loss, unstable
Ejemplo n.º 33
0
def evaluate(X_data):
    model.eval()
    eval_idx_list = np.arange(len(X_data), dtype="int32")
    total_loss = 0.0
    count = 0
    for idx in eval_idx_list:
        data_line = X_data[idx]
        x, y = Variable(data_line[:-1]), Variable(data_line[1:])
        if args.cuda:
            x, y = x.cuda(), y.cuda()
        output = model(x.unsqueeze(0)).squeeze(0)
        loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
                            torch.matmul((1-y), torch.log(1-output).float().t()))
        total_loss += loss.data[0]
        count += output.size(0)
    eval_loss = total_loss / count
    print("Validation/Test loss: {:.5f}".format(eval_loss))
    return eval_loss