def train(ep): model.train() total_loss = 0 count = 0 train_idx_list = np.arange(len(X_train), dtype="int32") np.random.shuffle(train_idx_list) for idx in train_idx_list: data_line = X_train[idx] x, y = Variable(data_line[:-1]), Variable(data_line[1:]) if args.cuda: x, y = x.cuda(), y.cuda() optimizer.zero_grad() output = model(x.unsqueeze(0)).squeeze(0) loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) + torch.matmul((1 - y), torch.log(1 - output).float().t())) total_loss += loss.data[0] count += output.size(0) if args.clip > 0: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) loss.backward() optimizer.step() if idx > 0 and idx % args.log_interval == 0: cur_loss = total_loss / count print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss)) total_loss = 0.0 count = 0
def __call__(self, y_pred, y_true=None): """ y_pred should be two projections """ covar_mat = th.abs(th_matrixcorr(y_pred[0].data, y_pred[1].data)) self.corr_sum += th.trace(covar_mat) self.total_count += covar_mat.size(0) return self.corr_sum / self.total_count
def laplacian(x_values,target,distance="cosine",m=1,classes=10,k=None,extract=False,reg_l2=False): x_values = x_values.clone() target = target.clone() n_examples = x_values.size(0) x_values = x_values.view(n_examples,-1) y_true = torch.cuda.FloatTensor(n_examples,classes) y_true.zero_() y_true.scatter_(1, target.data.view(-1,1), 1) y_true = Variable(y_true) transposed_y_true = torch.t(y_true) if k is None: neighbours = n_examples else: neighbours = k if distance == "cosine": normalized = F.normalize(x_values, p=2, dim=1) W_tf = torch.mm(normalized,torch.t(normalized)) y, ind = torch.sort(W_tf, 1) A = torch.zeros(*y.size()).cuda() k_biggest = ind[:,-neighbours:].data for index1,value in enumerate(k_biggest): A_line = A[index1] A_line[value] = 1 A_final = Variable(torch.min(torch.ones(*y.size()).cuda(),A+torch.t(A))) new_W_tf = W_tf*A_final d_tf = torch.sum(new_W_tf,1) d_tf = torch.diag(d_tf) laplacian_tf = (d_tf - new_W_tf) laplacian_after_m = laplacian_tf for _ in range(1,m): laplacian_after_m = torch.mm(laplacian_after_m,laplacian_tf) if reg_l2 and m > 1: clone = torch.abs(laplacian_after_m.clone()) mask = torch.diag(torch.ones_like(clone[0])) clone *= (1-mask) max_val = torch.max(clone.view(-1)) laplacian_after_m /= max_val if extract: return laplacian_after_m else: final_laplacian_tf = torch.mm(transposed_y_true, laplacian_after_m) final_laplacian_tf = torch.mm(final_laplacian_tf,y_true) final_laplacian_tf = torch.trace(final_laplacian_tf) return final_laplacian_tf
def IG_Loss_ZZ(netG, netIG, mb_size, Z_dim, z, use_cuda=True): interpolates = z if use_cuda: interpolates = interpolates.cuda() interpolates = autograd.Variable(interpolates, requires_grad=True) G = netG(interpolates) z_hat = netIG(G) zN = z_hat.size()[1] #grad_list = [] for i in range(zN): z_ = z_hat[:, i] ##print(pixel.size()) gradients = autograd.grad(outputs=z_, inputs=interpolates, grad_outputs=torch.ones(G.size()[0]).cuda() if use_cuda else torch.ones(G.size()[0]), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.unsqueeze(2) if i == 0: grad = gradients else: grad = torch.cat((grad, gradients), 2) IG_det_penalty = 0 ImN = grad.size()[0] for i in range(ImN): m = grad[i, :, :] teye = torch.eye(Z_dim).cuda() if use_cuda else torch.eye(Z_dim) tmp = m - teye IG_det_penalty += torch.trace(torch.mm(torch.t(tmp), tmp)) """======== IG eigen penalty """ ub = torch.Tensor([20.0]) lb = torch.Tensor([1.0]) #Variable( if use_cuda: ub = ub.cuda() lb = lb.cuda() delta = torch.randn(mb_size, Z_dim) # Variable() if use_cuda: delta = delta.cuda() eps = 0.1 delta = (delta / delta.norm(2)) * eps z_t = z + delta Q = torch.sqrt(torch.sum( (netIG(netG(z_t)) - z_hat)**2)) / torch.sqrt(torch.sum((z_t - z)**2)) Lmax = (torch.max(Q, ub) - ub)**2 Lmin = (torch.min(Q, lb) - lb)**2 IG_L = Lmax + Lmin return z_hat, IG_det_penalty, IG_L
def make_d2Vdq(self, q, guess, q_last=None, fJHu=None): guess = self.solve(q, guess, q_last=None) if fJHu is None: f, JV, H, u = self.fem.f_J_H(q, initial_guess=guess, return_u=True) J = self.fsm.to_torch(JV) else: f, J, H, u = fJHu fhat, Jhat, Hhat = self.sem.f_J_H(q, self.params) fhat = fhat.view(1) Jhat = Jhat.view(-1) Hhat = Hhat.view(len(q), len(q)) f_taylor = Taylor(f, J, H, q) fhat_taylor = Taylor(fhat, Jhat, H, q) q = torch.autograd.Variable(q.data, requires_grad=True) # pdb.set_trace() def energy(q): fnew = f_taylor(q) + 1e-9 fhatnew = fhat_taylor(q) + 1e-9 # = f/g + g/f # d/du = (f'g - fg')/g^2 + (g'f - gf')/f^2 # d2/du = # pdb.set_trace() return (torch.log(fnew) - torch.log(fhatnew))**2 + 1e-9 * (q**2).sum(dim=-1) # pdb.set_trace() qs = torch.stack([ torch.autograd.Variable(q.data, requires_grad=True) for _ in range(len(q)) ]) energies = energy(qs) grads = torch.autograd.grad(energies.sum(), qs, create_graph=True)[0].contiguous() dVdq = -grads[0].data.clone() d2Vdq = (-torch.autograd.grad(torch.trace(grads), qs)[0].contiguous().view(len(q), len(q))) # Apply damping evals = torch.symeig(d2Vdq).eigenvalues if torch.min(evals) < 0: d2Vdq = d2Vdq + torch.eye( len(dVdq)) * (1e-3 + torch.abs(torch.min(evals))) return d2Vdq, dVdq, u.vector()
def test_jacobian_plowrank(): for get_task in nonlinear_tasks: loader, lc, parameters, model, function, n_output = get_task() generator = Jacobian(layer_collection=lc, model=model, function=function, n_output=n_output) PMat_lowrank = PMatLowRank(generator=generator, examples=loader) dw = random_pvector(lc, device=device) dw = dw / dw.norm() dense_tensor = PMat_lowrank.get_dense_tensor() # Test get_diag check_tensors(torch.diag(dense_tensor), PMat_lowrank.get_diag(), eps=1e-4) # Test frobenius frob_PMat = PMat_lowrank.frobenius_norm() frob_direct = (dense_tensor**2).sum()**.5 check_ratio(frob_direct, frob_PMat) # Test trace trace_PMat = PMat_lowrank.trace() trace_direct = torch.trace(dense_tensor) check_ratio(trace_PMat, trace_direct) # Test mv mv_direct = torch.mv(dense_tensor, dw.get_flat_representation()) mv = PMat_lowrank.mv(dw) check_tensors(mv_direct, mv.get_flat_representation()) # Test vTMV check_ratio(torch.dot(mv_direct, dw.get_flat_representation()), PMat_lowrank.vTMv(dw)) # Test solve # We will try to recover mv, which is in the span of the # low rank matrix regul = 1e-3 mmv = PMat_lowrank.mv(mv) mv_using_inv = PMat_lowrank.solve(mmv, regul=regul) check_tensors(mv.get_flat_representation(), mv_using_inv.get_flat_representation(), eps=1e-2) # Test inv TODO # Test add, sub, rmul check_tensors(1.23 * PMat_lowrank.get_dense_tensor(), (1.23 * PMat_lowrank).get_dense_tensor())
def eval_obs(self, state, env_c4v): r""" :param state: wavefunction :param env_c4v: CTM c4v symmetric environment :type state: IPEPS :type env_c4v: ENV_C4V :return: expectation values of observables, labels of observables :rtype: list[float], list[str] Computes the following observables in order 1. magnetization 2. :math:`\langle S^z \rangle,\ \langle S^+ \rangle,\ \langle S^- \rangle` where the on-site magnetization is defined as .. math:: \begin{align*} m &= \sqrt{ \langle S^z \rangle^2+\langle S^x \rangle^2+\langle S^y \rangle^2 } =\sqrt{\langle S^z \rangle^2+1/4(\langle S^+ \rangle+\langle S^- \rangle)^2 -1/4(\langle S^+\rangle-\langle S^-\rangle)^2} \\ &=\sqrt{\langle S^z \rangle^2 + 1/2\langle S^+ \rangle \langle S^- \rangle)} \end{align*} Usual spin components can be obtained through the following relations .. math:: \begin{align*} S^+ &=S^x+iS^y & S^x &= 1/2(S^+ + S^-)\\ S^- &=S^x-iS^y\ \Rightarrow\ & S^y &=-i/2(S^+ - S^-) \end{align*} """ # TODO optimize/unify ? # expect "list" of (observable label, value) pairs ? obs = dict() with torch.no_grad(): rdm1x1 = rdm_c4v.rdm1x1(state, env_c4v) for label, op in self.obs_ops.items(): obs[f"{label}"] = torch.trace(rdm1x1 @ op) obs[f"m"] = sqrt(abs(obs[f"sz"]**2 + obs[f"sp"] * obs[f"sm"])) rdm2x1 = rdm_c4v.rdm2x1(state, env_c4v) obs[f"SS2x1"] = torch.einsum('ijab,ijab', rdm2x1, self.h2_rot) # prepare list with labels and values obs_labels = [f"m"] + [f"{lc}" for lc in self.obs_ops.keys()] + [f"SS2x1"] obs_values = [obs[label] for label in obs_labels] return obs_values, obs_labels
def rbf_cmmd(self, sX, tX, sY, tY): ''' Return CMMD score based on guassian kernel. ''' n_sample1 = sX.size(0) n_sample2 = tX.size(0) device = sX.device batch_size = sX.size(0) xkernels = self.guassian_kernel(sX, tX, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=self.fix_sigma) ykernels = self.guassian_kernel(sY, tY, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=self.fix_sigma) X11 = xkernels[:batch_size, :batch_size] X21 = xkernels[batch_size:, :batch_size] X22 = xkernels[batch_size:, batch_size:] Y11 = ykernels[:batch_size, :batch_size] Y12 = ykernels[:batch_size, batch_size:] Y22 = ykernels[batch_size:, batch_size:] X11_inver = torch.inverse(X11 + self.eplison * n_sample1 * torch.eye(n_sample1).to(device)) X22_inver = torch.inverse(X22 + self.eplison * n_sample2 * torch.eye(n_sample2).to(device)) cmmd1 = -2.0 / (n_sample1 * n_sample2) * torch.trace( X21.mm(X11_inver).mm(Y12).mm(X22_inver)) cmmd2 = 1.0 / (n_sample1 * n_sample1) * torch.trace(Y11.mm(X11_inver)) cmmd3 = 1.0 / (n_sample2 * n_sample2) * torch.trace(Y22.mm(X22_inver)) loss = cmmd1 + cmmd2 + cmmd3 return torch.sqrt(loss)
def _torch_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): """Pytorch implementation of the Frechet Distance. Taken from https://github.com/bioinf-jku/TTUR The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) and X_2 ~ N(mu_2, C_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). Stable version by Dougal J. Sutherland. Params: -- mu1 : Numpy array containing the activations of a layer of the inception net (like returned by the function 'get_predictions') for generated samples. -- mu2 : The sample mean over activations, precalculated on an representive data set. -- sigma1: The covariance matrix over activations for generated samples. -- sigma2: The covariance matrix over activations, precalculated on an representive data set. Returns: -- : The Frechet Distance. """ assert mu1.shape == mu2.shape, \ 'Training and test mean vectors have different lengths' assert sigma1.shape == sigma2.shape, \ 'Training and test covariances have different dimensions' import torch mu1 = torch.from_numpy(mu1).cuda() sigma1 = torch.from_numpy(sigma1).cuda() mu2 = torch.from_numpy(mu2).cuda() sigma2 = torch.from_numpy(sigma2).cuda() diff = mu1 - mu2 # Run 50 itrs of newton-schulz to get the matrix sqrt of sigma1 dot sigma2 covmean = sqrt_newton_schulz(sigma1.mm(sigma2).unsqueeze(0), 50).squeeze() out = (diff.dot(diff) + torch.trace(sigma1) + torch.trace(sigma2) - 2 * torch.trace(covmean)) return out
def quad_approx(theta, prev_theta, S, step_size): f_theta = get_f_theta(theta, S) qt1 = get_f_theta(prev_theta, S) # term 1 of Q qt2 = torch.trace( torch.matmul(theta - prev_theta, S - torch.inverse(prev_theta))) qt3 = 1 / (2 * step_size) * torch.sum( (theta - prev_theta)**2) # get_frobenius_norm Q_eta = qt1 + qt2 + qt3 # print('QUAD approx: ', f_theta, Q_eta) #return f_theta <= Q_eta #return f_theta <= Q_eta or torch.abs(torch.abs(f_theta) - torch.abs(Q_eta)) <= 0.01 * torch.abs(f_theta) # difference within 1% return f_theta <= Q_eta or torch.abs( torch.abs(f_theta) - torch.abs(Q_eta)) <= 0.01 * torch.abs(f_theta) # difference within 1%
def compute_loss(self, X, labels): self.kern = self.kernel(X, X, self.W) K = self.kern + torch.eye(self.kern.size()[0]).to(device) * self.lambda_reg L = torch.cholesky(K, upper=False) one_hot_y = F.one_hot(labels, num_classes=10).type(torch.FloatTensor) # A, _ = torch.solve(kern, L) # V, _ = torch.solve(one_hot_y, L) # alpha = A.T @ V self.alpha = torch.cholesky_solve(one_hot_y, L, upper=False) # output = K.T @ self.alpha output = self.kern.T @ self.alpha loss = self.loss_fn(output, one_hot_y) + self.lambda_reg * torch.trace(self.alpha.T @ self.kern @ self.alpha) return loss
def Obsv(psi, Lpsi, T, O, beta): """ calculate the thermal average of the observable O as <I \otimes O \otimes I>_{K} where K is the K-matrix corresponding to <Lpsi|T|psi> K plays the role of effective Hamiltonian T: cMPO psi: cMPS (right eigenvector) Lpsi: cMPS (left eigenvector) O: the observable beta: inverse temperature """ dtype, device = psi.dtype, psi.device totalD = O.shape[0]*psi.dim*psi.dim matI = torch.eye(psi.dim, dtype=dtype, device=device) matO = torch.einsum('ab,cd,ef->acebdf', matI, O, matI).contiguous().view(totalD, totalD) Tpsi = act(T, psi) M = density_matrix(Lpsi, Tpsi) w, v = eigensolver(M) w -= w.max().item() expw = torch.diag(torch.exp(beta*w)) return torch.trace(expw @ v.t() @ matO @ v).item() / torch.trace(expw).item()
def dtaudq(p, dH, Q, lam, alpha): N = len(p) Jm = J(lam, alpha, len(p)) #print("eigenvalues {}".format(lam)) #print("J {}".format(Jm)) Dm = D(p, Q, lam, alpha) #print("D {}".format(Dm)) M = torch.mm(Q, torch.mm(Dm, torch.mm(Jm, torch.mm(Dm, torch.t(Q))))) #print("M is {}".format(M)) delta = torch.zeros(N) for i in range(N): delta[i] = 0.5 * torch.trace(-torch.mm(M, dH[i, :, :])) return (delta)
def find_grad(self, s, s0, lap): with torch.enable_grad(): s_cp = s.clone().detach().requires_grad_(True) print(s_cp.shape) s0_cp = s0.clone().detach().requires_grad_(True) print(lap.shape) cut =\ torch.trace(torch.matmul(torch.transpose(self.softmax(s_cp),-1,-2),torch.matmul(lap,\ self.softmax(s_cp)))) norm_reg = torch.pow(torch.norm(s0_cp - s_cp, dim=(-1, -2)), 2) val = cut + norm_reg val.backward() return s_cp.grad
def _evalSumAcrossTrials(self, Kzz, KzzChol, qMu, qSigma): # ESS \in nTrials x nInd x nInd ESS = qSigma + torch.matmul(qMu, qMu.permute(0, 2, 1)) nTrials = qMu.shape[0] answer = 0 for trial in range(nTrials): _, logdetKzz = Kzz[trial, :, :].slogdet() # O(n^3) _, logdetQSigma = qSigma[trial, :, :].slogdet() # O(n^3) traceTerm = torch.trace( torch.cholesky_solve(ESS[trial, :, :], KzzChol[trial, :, :])) trialKL = .5 * (traceTerm + logdetKzz - logdetQSigma - ESS.shape[1]) answer += trialKL return answer
def mmd(x,y,B,alpha=1): ### # Input: # x = tensor of shape [B, 1, IMG_DIM, IMG_DIM] (e.g. real images) # y = tensor of shape [B, 1, IMG_DIM, IMG_DIM] (e.g. fake images) # B = batch size (or size of samples to be compared); B(x) has to be B(y) # alpha = kernel parameter # # Output: # mmd score ### x = x.view(x.size(0), x.size(2) * x.size(3)) y = y.view(y.size(0), y.size(2) * y.size(3)) xx, yy, zz = torch.mm(x,x.t()), torch.mm(y,y.t()), torch.mm(x,y.t()) rx = (xx.diag().unsqueeze(0).expand_as(xx)) ry = (yy.diag().unsqueeze(0).expand_as(yy)) K = torch.exp(- alpha * (rx.t() + rx - 2*xx)) L = torch.exp(- alpha * (ry.t() + ry - 2*yy)) P = torch.exp(- alpha * (rx.t() + ry - 2*zz)) beta = (1./(B*(B-1))) gamma = (2./(B*B)) mmd = beta * (torch.sum(K)-torch.trace(K)+torch.sum(L)-torch.trace(L)) - gamma * torch.sum(P) return mmd
def KL_cal(a, mu, Q, eigen): #print("a",a) KL = 0 for idx in range(N): #idx= 0 this_mu = mu[:, idx, :].view(-1, z_dim) this_eigen = eigen[:, idx, :].view(-1, z_dim) this_Q = Q[:, idx, :, :].view(-1, z_dim, z_dim) #(batch,1,latent,latent) diag_list = [] for i in range(mb_size): diag_list.append( torch.diag(this_eigen[i, :]).view(1, z_dim, z_dim)) m_diag = torch.cat(diag_list, 0) #(batch,20) mul1 = batch_matmul(this_Q, m_diag) var = batch_matmul(mul1, torch.inverse(this_Q)) p_mu = torch.zeros([mb_size, z_dim]) p_sigma = torch.cat([ torch.diag(torch.ones([z_dim], dtype=torch.float32)).view( [1, z_dim, z_dim]) ] * mb_size, 0) ans = [] for i in range(mb_size): _sigma0, _sigma1, _mu0, _mu1 = var[i, :, :], p_sigma[ i, :, :], this_mu[i, :].view(1, z_dim), p_mu[i, :].view(1, z_dim) #print(torch.inverse(_sigma1)) kl1 = torch.trace(torch.inverse(_sigma1) * _sigma0) #print(_mu0.shape) #print((_mu1-_mu0).transpose(1,0).shape) #print(_sigma1.shape) mul1 = torch.mm((_mu1 - _mu0), torch.inverse(_sigma1)) #(1,20) d1 = torch.det(_sigma1) d0 = torch.det(_sigma0) d = torch.log(d1 / d0) kl2 = torch.mm(mul1, (_mu1 - _mu0).transpose(1, 0)) - N + d kl = 0.5 * (kl1 + kl2) with torch.no_grad(): ans.append(kl * a[i, idx]) KL += torch.sum(torch.cat(ans, 0)) return KL
def frechet_distance(mu_x, mu_y, sigma_x, sigma_y): """ Function for returning the Fréchet distance between multivariate Gaussians, parameterized by their means and covariance matrices. Parameters: mu_x: the mean of the first Gaussian, (n_features) mu_y: the mean of the second Gaussian, (n_features) sigma_x: the covariance matrix of the first Gaussian, (n_features, n_features) sigma_y: the covariance matrix of the second Gaussian, (n_features, n_features) """ res = torch.norm(mu_x - mu_y)**2 + torch.trace(sigma_x + sigma_y - 2 * matrix_sqrt(sigma_x @ sigma_y)) return res
def per_component_m_step(i): mu_i = torch.sum(r[:, [i]] * x, dim=0) / r_sum[i] s2_I = torch.exp(self.log_D[i, 0]) * torch.eye(l, device=x.device) inv_M_i = torch.inverse(self.A[i].T @ self.A[i] + s2_I) x_c = x - mu_i.reshape(1, d) SiAi = (1.0 / r_sum[i]) * (r[:, [i]] * x_c).T @ (x_c @ self.A[i]) invM_AT_Si_Ai = inv_M_i @ self.A[i].T @ SiAi A_i_new = SiAi @ torch.inverse(s2_I + invM_AT_Si_Ai) t1 = torch.trace(A_i_new.T @ (SiAi @ inv_M_i)) trace_S_i = torch.sum(N / r_sum[i] * torch.mean(r[:, [i]] * x_c * x_c, dim=0)) sigma_2_new = (trace_S_i - t1) / d return mu_i, A_i_new, torch.log(sigma_2_new) * torch.ones_like( self.log_D[i])
def time_test(N): start_time = time.perf_counter() A = torch.randn((N, N), requires_grad=True) At = torch.transpose(A, 0, 1) T = torch.trace(A @ At) T.backward() stop_time = time.perf_counter() return stop_time - start_time
def angle_mat(R1, R2): """ :param R1: B x 3 x 3, :type torch.float :param R2: B x 3 x 3, :type torch.float :return: B, angle in degrees, :type torch.float """ R_d = R1.transpose(1, 2) @ R2 angles = torch.zeros(R1.shape[0]).to(R1.device) for i, i_R_d in enumerate(R_d): c = (torch.trace(i_R_d) - 1) / 2 angles[i] = rad2deg(torch.acos(c.clamp(min=-1.0, max=1.0))) return angles
def forward(self, x): corr_matrix = torch.zeros(self.nparts, self.nparts).cuda() loss = torch.zeros(1, requires_grad=True).cuda() x = x.reshape(x.size(0), self.nparts, -1) x = torch.div(x, x.norm(dim=-1, keepdim=True)) for i in range(self.nparts): for j in range(self.nparts): corr_matrix[i, j] = torch.mean(torch.mm(x[:,i], x[:,j].t())) loss = (torch.sum(corr_matrix) - 3 * torch.trace(corr_matrix) + 2 * self.nparts) / 2.0 return torch.mul(loss, self.gamma.cuda())
def loss_dc_whitened(embd, label, weight=None): if type(weight) == torch.Tensor: weight = torch.sqrt(weight).unsqueeze(-1) embd = embd * weight label = label * weight C = label.shape[2] D = embd.shape[2] VtV = embd.transpose(1, 2).bmm(embd) + 1e-24 * torch.eye(D) VtY = embd.transpose(1, 2).bmm(label) YtY = label.transpose(1, 2).bmm(label) + 1e-24 * torch.eye(C) return D - torch.trace(torch.sum( VtV.inverse().bmm(VtY).bmm(YtY.inverse()).bmm(VtY.transpose(1, 2)), dim=0 )) / embd.shape[0]
def forward(self, adj_mat, l_mat): t0 = F.leaky_relu(self.encode0(adj_mat)) t0 = F.leaky_relu(self.encode1(t0)) self.embedding = t0 t0 = F.leaky_relu(self.decode0(t0)) t0 = F.leaky_relu(self.decode1(t0)) L_1st = 2 * torch.trace(torch.mm(torch.mm(torch.t(self.embedding), l_mat), self.embedding)) L_2nd = torch.sum(((adj_mat - t0) * adj_mat * self.beta) * ((adj_mat - t0) * adj_mat * self.beta)) L_reg = 0 for param in self.parameters(): L_reg += self.nu1 * torch.sum(torch.abs(param)) + self.nu2 * torch.sum(param * param) return self.alpha * L_1st, L_2nd, self.alpha * L_1st + L_2nd, L_reg
def forward(ctx, input): with torch.no_grad(): # send tensor to cpu in numpy format and compute expm using scipy expm_input = expm(input.detach().cpu().numpy()) # transform back into a tensor expm_input = torch.as_tensor(expm_input) if input.is_cuda: # expm_input = expm_input.cuda() assert expm_input.is_cuda # save expm_input to use in backward ctx.save_for_backward(expm_input) # return the trace return torch.trace(expm_input)
def criterion(self, x, y): """ Defines the HSIC criterion. """ x, y = x.to(self.device), y.to(self.device) m = x.shape[0] K_x = kernel_module.get(self.kernel)(x, x, self.params_dict) K_y = kernel_module.get(self.kernel)(y, y, self.params_dict) H = torch.eye(m, m) - (1 / m) * torch.ones(m, m) H = H.to(self.device) matrix_x = torch.mm(K_x, H) matrix_y = torch.mm(K_y, H) return (1 / (m - 1)) * torch.trace(torch.mm(matrix_x, matrix_y))
def kl_div(output_mu, output_sig, target_mu, target_sig, device): """ Compute the KL-divergence between 2 Gaussian distributions [Lots of numerical issues] Parameters: output_mu (1d tensor) -- mean of the variables output_sig (2d tensor) -- covariance of the variables target_mu (1d tensor) -- target mean target_sig (2d tensor) -- target covariance device (int) -- device of the arrays above """ output_sig_inv = torch.inverse(output_sig) target_sig_inv = torch.inverse(target_sig) loss1 = torch.dot(output_mu-target_mu,\ torch.mv(output_sig_inv+ target_sig_inv, output_mu-target_mu)) loss2 = torch.trace(output_sig_inv.mm(target_sig_inv))\ + torch.trace(target_sig_inv.mm(output_sig_inv)) loss = 0.0 * loss1 + 1e-24 * torch.pow(loss2, 2) return loss
def get_duality_gap(theta, S, rho): # rho = torch.Tensor([args.rho]) # if USE_CUDA: # rho = rho.cuda() U = torch.min(torch.max(torch.inverse(theta) - S, -1 * rho), rho) #t1 = -1*torch.log(torch.det(S+U)) - args.N # term1 #t2 = -1*torch.log(torch.det(theta)) t1 = -1 * get_logdet(S + U) - args.N # term1 t2 = -1 * get_logdet(theta) t3 = torch.trace(torch.matmul(S, theta)) #t4 = args.rho*torch.max(torch.sum(torch.abs(theta), 0)) # L1 norm of mat is max abs column sum t4 = rho * torch.sum(torch.abs(theta)) # L1 norm of mat # print('DUALITY: ', t1, t2, t3, t4) return t1 + t2 + t3 + t4
def wasserstein_penalty_func(p, q): def cov(x: torch.tensor) -> torch.tensor: x = x - torch.mean(x, dim=1, keepdim=True) return (1. / (x.size(1) - 1)) * x.matmul(x.t()) mean_p = p.mean(dim=1) cov_p = cov(p) mean_q = q.mean(dim=1) cov_q = cov(q) first = torch.sum(torch.pow(mean_p - mean_q, 2)) second = cov_p.trace() third = cov_q.trace() fourth = 2 * torch.trace(torch.matmul(cov_p, cov_q).relu().sqrt()) return first + second + third - fourth
def build_loss(self, recons, weights, raw_weights): size = self.X.shape[0] loss = 0 loss += raw_weights * torch.log(raw_weights / recons + 10**-10) loss = loss.sum(dim=1) loss = loss.mean() # loss += 10**-3 * (torch.mean(self.embedding.pow(2))) # loss += 10**-3 * (torch.mean(self.W1.pow(2)) + torch.mean(self.W2.pow(2))) # loss += 10**-3 * (torch.mean(self.W1.abs()) + torch.mean(self.W2.abs())) degree = weights.sum(dim=1) L = torch.diag(degree) - weights loss += self.lam * torch.trace(self.embedding.t().matmul(L).matmul( self.embedding)) / size return loss
def dphidq(lam,alpha,dH,Q,dV): N = len(lam) #print("lam is {}".format(lam)) Jm = J(lam,alpha,len(lam)) R = torch.diag(1/(lam*coth_torch(alpha*lam))) M = torch.mm(Q,torch.mm(R*Jm,torch.t(Q))) #print("M is {}".format(M)) #print("dH is {}".format(dH[0,:,:])) #print("trace(MdH) is {}".format(torch.trace(torch.mm(M,dH[0,:,:])) )) #print("dV is {}".format(dV)) delta = torch.zeros(N) for i in range(N): delta[i] = 0.5 * torch.trace(torch.mm(M,dH[i,:,:])) + dV[i] return(delta)
def forward(self) -> Tuple[torch.Tensor, int]: """Return a value proportional to the log likelihood.""" unstable = 0 # model-implied covariance/mean sigma, mu = self.implied_sigma_mu() if self.fiml: # FIML -2 * logL (without constants) assert mu is not None loss = torch.zeros(1, dtype=mu.dtype, device=mu.device) mean_diffs: torch.Tensor = self.data - mu.t() for pairs in self.missing_patterns.values(): # calculate sigma^-1 and logdet(sigma) in batches sigmas = torch.stack([ sigma.index_select(0, x[1]).index_select(1, x[1]) for x in pairs ]) sigmas_logdet = torch.logdet(sigmas) sigmas = torch.inverse(sigmas) for i, (observations, available) in enumerate(pairs): mean_diff = mean_diffs.index_select( 0, observations).index_select(1, available) loss_current = sigmas_logdet[i] * len( observations) + torch.trace( mean_diff.mm(sigmas[i]).mm(mean_diff.t())) unstable += loss_current.detach().item() < 0 loss = loss + loss_current.clamp(min=0.0) else: # maximum likelihood loss = torch.logdet(sigma) + torch.trace( self.sample_covariance.mm(torch.inverse(sigma))) unstable += loss.detach().item() < 0 return loss, unstable
def evaluate(X_data): model.eval() eval_idx_list = np.arange(len(X_data), dtype="int32") total_loss = 0.0 count = 0 for idx in eval_idx_list: data_line = X_data[idx] x, y = Variable(data_line[:-1]), Variable(data_line[1:]) if args.cuda: x, y = x.cuda(), y.cuda() output = model(x.unsqueeze(0)).squeeze(0) loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) + torch.matmul((1-y), torch.log(1-output).float().t())) total_loss += loss.data[0] count += output.size(0) eval_loss = total_loss / count print("Validation/Test loss: {:.5f}".format(eval_loss)) return eval_loss