def step(self): # Add weight decay if self.weight_decay > 0: for p in self.model.parameters(): p.grad.data.add_(self.weight_decay, p.data) updates = {} for i, m in enumerate(self.modules): assert len(list(m.parameters()) ) == 1, "Can handle only one parameter at the moment" classname = m.__class__.__name__ p = next(m.parameters()) la = self.damping + self.weight_decay if self.steps % self.Tf == 0: # My asynchronous implementation exists, I will add it later. # Experimenting with different ways to this in PyTorch. self.d_a[m], self.Q_a[m] = torch.symeig( self.m_aa[m], eigenvectors=True) self.d_g[m], self.Q_g[m] = torch.symeig( self.m_gg[m], eigenvectors=True) self.d_a[m].mul_((self.d_a[m] > 1e-6).float()) self.d_g[m].mul_((self.d_g[m] > 1e-6).float()) if classname == 'Conv2d': p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1) else: p_grad_mat = p.grad.data v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m] v2 = v1 / ( self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la) v = self.Q_g[m] @ v2 @ self.Q_a[m].t() v = v.view(p.grad.data.size()) updates[p] = v vg_sum = 0 for p in self.model.parameters(): if p not in updates: # print("Not found in updates: %s" % p) continue v = updates[p] vg_sum += (v * p.grad.data * self.lr * self.lr).sum() nu = min(1, math.sqrt(self.kl_clip / vg_sum)) for p in self.model.parameters(): if p not in updates: # print("Not found in updates: %s" % p) continue v = updates[p] p.grad.data.copy_(v) p.grad.data.mul_(nu) self.optim.step() self.steps += 1
def sumin_pca(self, x): n_sample, n_feature = x.size() #sample, feature x_cen = x - torch.mean(x, 0) x_cov = torch.mm(x_cen.T, x_cen) / n_sample evalue, evector = torch.symeig(x_cov, eigenvectors=True) #오름차순 evector = torch.flip(evector, dims=[ 0 ])[:n_components] #내림차순 & N_COMPONENT => (n_components, feature) vector_len = torch.norm(evector, dim=1) for n in range(n_components): evector[n] /= vector_len[n] component_by_sample = torch.mm(evector, x.T) sample_by_component = component_by_sample.T return sample_by_component
def __getitem__(self, index): sample_processed = self.all_data[index].float() sample_processed = sample_processed.view(-1, 3, 3) samples = [] for i in range(sample_processed.shape[0]): s, v = torch.symeig(sample_processed[i], eigenvectors=True) s = torch.diag(torch.clamp(s, min=0.0001)) samples.append(torch.matmul(v, torch.matmul(s, v.t()))) sample_processed = torch.stack(samples) #sample_processed = sample_processed.reshape(1,64,64,3,3)[:,16:48,16:48,...] sample_processed = sample_processed.reshape(1, 32, 32, 3, 3) #print(sample_processed) return sample_processed
def conv_params_pdpt(ni, no, k): w = conv_params(ni, no, k) if ni != no: return w else: n = no r = int(np.ceil(float(n) / 2)) slices = w.view(n, n, -1).permute(2, 0, 1) es, Vs = [], [] for u in slices: #u_sym = u.triu(diagonal=1).t() + u.triu() u_sym = (u + u.t()).div(2) e, V = torch.symeig(u_sym) es.append(e[r:-1]) Vs.append(V[:, r:-1]) return {'e': torch.stack(es), 'V': torch.stack(Vs)}
def squared_deviation(self, y_prime, y): """ y_prime, y: (n_atoms, 3) """ R = torch.matmul( y_prime.t(), y) # ordinary cross-correlation of xyz coordinates. (3, 3) R_parts = [torch.unbind(t) for t in torch.unbind(R)] F_parts = self.optimal_rotational_quaternion(R_parts) F = torch.tensor(F_parts) # backward pass in only supported in symeig # returned eigenvalues are in acending order vals, vecs = torch.symeig(F, eigenvectors=True) lamx = vals[-1] # getting the max eigen value sd = torch.sum(y_prime**2 + y**2) - 2 * lamx return sd
def colouring(fs, fc_hat): # f = [C, H*W] ms = torch.mean(fs, dim=-1) # [C] ms = ms.unsqueeze(1) # np.reshape(ms, [-1, 1]) fs -= ms covar = torch.matmul(fs, torch.transpose(fs, 0, 1)) # [C, C] eigenvalues, Es = torch.symeig(covar, eigenvectors=True) # ([C], [C, C]) eigenvalues = torch.pow(eigenvalues, 0.5) Dc = torch.diag(eigenvalues) # [C, C] mid = torch.matmul(Es, torch.matmul(Dc, torch.transpose(Es, 0, 1))) return torch.matmul(mid, fc_hat) + ms
def _fit(self, x_train, y_train): cls_data = [ torch.cat( [x_train[j] for j in range(len(y_train)) if y_train[j] == i], 1) for i in set(y_train) ] sub_basis = (pca_for_sets(cls_data, self.n_sdim, self.p_norm).contiguous().permute((2, 0, 1))) gram_mat = (sub_basis @ sub_basis.permute((0, 2, 1))).sum(0) full_dim = torch.matrix_rank(gram_mat) _, eig_vec = torch.symeig(gram_mat, eigenvectors=True) eig_vec = eig_vec.flip(1)[:, self.n_reducedim:full_dim] self.metric_mat = eig_vec @ eig_vec.T self.dic = ortha_subs(self.sub_basis, self.metric_mat)
def forward(ctx, A): #n = int(len(S) ** 0.5) #A = S.reshape(n, n) print('A=', A) n = len(A) k = n if 1: e, v = T.symeig(-A, eigenvectors=True) e = -e[:k] v = v[:, :k] else: e, v = T.lobpcg(A, k=k) r = T.cat((T.flatten(v), e), 0) ctx.save_for_backward(e, v, A) print('r=', r) return e, v
def __expm__(self, matrix, symmetric): r"""Calculates matrix exponential. Args: matrix (Tensor): Matrix to take exponential of. symmetric (bool): Specifies whether the matrix is symmetric. :rtype: (:class:`Tensor`) """ if symmetric: e, V = torch.symeig(matrix, eigenvectors=True) diff_mat = V @ torch.diag(e.exp()) @ V.t() else: diff_mat_np = expm(matrix.cpu().numpy()) diff_mat = torch.Tensor(diff_mat_np).to(matrix.device) return diff_mat
def renyi_entropy(x, sigma, alpha): """calculate entropy for single variables x (Eq.(9) in paper) Args: x: random variable with two dimensional (N,d). sigma: kernel size of x (Gaussain kernel) alpha: alpha value of renyi entropy Returns: renyi alpha entropy of x. """ k = calculate_gram_mat(x, sigma) k = k / torch.trace(k) eigv = torch.abs(torch.symeig(k, eigenvectors=True)[0]) eig_pow = eigv**alpha entropy = (1 / (1 - alpha)) * torch.log2(torch.sum(eig_pow)) return entropy
def forward(ctx, input, eps=1e-2): use_cuda = input.is_cuda if input.shape[0] < 300: input = input.cpu() e, v = torch.symeig(input, eigenvectors=True) if use_cuda and not e.is_cuda: e = e.cuda() v = v.cuda() e = e.clamp(min=0) e_sqrt = e.sqrt_().add_(eps) ctx.e_sqrt = e_sqrt ctx.v = v e_rsqrt = e_sqrt.reciprocal() output = v.mm(torch.diag(e_rsqrt).mm(v.t())) return output
def calc_style_desc(activations: torch.Tensor, take_root: bool = False): """Get the style description tensors from a actvation tensor Arguments: activations {torch.Tensor} -- Activation feature map from VGG take_root {bool} -- Where to return the root of the covariance """ mu, cov = calc_2_moments(activations) eigvals, eigvects = torch.symeig(cov, eigenvectors=True) tr_cov = torch.sum(eigvals) if take_root: eigroot_mat = torch.diag(torch.sqrt(torch.clamp(eigvals, 0))) root_cov = eigvects.mm(eigroot_mat).mm(eigvects.t()) return mu, tr_cov, root_cov return mu, tr_cov, cov
def _generate_gauss_cov(): from torch.distributions import Bernoulli from torch.distributions import Uniform ones = torch.ones((len(theta), len(theta)), device=theta.device) prec = Bernoulli(0.1 * ones).sample() * \ Uniform(0.4 * ones, 0.8 * ones).sample() for ii in range(len(theta)): for jj in range(ii + 1, len(theta)): prec[ii, jj] = prec[jj, ii] prec = prec + (torch.symeig(prec)[0].min().abs() + 0.05) * torch.eye( len(theta), device=theta.device) return torch.inverse(prec)
def low_pass_filter_direct(x, adj, filter_): nodes_to_keep = np.where(adj.sum(axis=1) > 0)[0] adj = adj.todense() np.fill_diagonal(adj, 0) real_adj = adj[nodes_to_keep] real_adj = real_adj[:, nodes_to_keep] x = x[nodes_to_keep] laplacian = torch.cuda.FloatTensor( normalization.normalized_laplacian(real_adj).todense()) _, eigenVectors = torch.symeig(laplacian, eigenvectors=True) x = torch.matmul(eigenVectors.T, x) x = filter_[:len(nodes_to_keep)] * x x = torch.matmul(eigenVectors, x) return x
def train(cls, vectors, processing_instruction, device): """[summary] Arguments: vectors {[type]} -- [description] processing_instruction {String} -- [Contains characters 'c', 'w', 'l'. For example 'cwlc' performs centering, whitening, length normalization, and centering (2nd time) in this order.] """ print('Training vector processor ...') c_count = processing_instruction.count('c') w_count = processing_instruction.count('w') vec_size = vectors.size()[1] whitening_matrices = torch.zeros(w_count, vec_size, vec_size, device=device) centering_vectors = torch.zeros(c_count, vec_size, device=device) vectors = vectors.to(device) c_count = 0 w_count = 0 for c in processing_instruction: if c == 'c': print('Centering...') centering_vectors[c_count, :] = torch.mean(vectors, dim=0) vectors = vectors - centering_vectors[c_count, :] c_count += 1 elif c == 'w': print('Whitening...') l, U = torch.symeig(torch.matmul(vectors.t(), vectors) / vectors.size()[0], eigenvectors=True) l = torch.clamp(l, min=1e-10) whitening_matrices[ w_count, :, :] = torch.rsqrt(l) * U # transposed vectors = torch.matmul(vectors, whitening_matrices[w_count, :, :]) w_count += 1 elif c == 'l': print('Normalizing length...') vectors = unit_len_norm(vectors) return VectorProcessor(centering_vectors, whitening_matrices, processing_instruction)
def _update_estimated_hessian(self): m = self.update_counter for group in self.param_groups: w_var = group['W_var'].to(self.device) lam = group['lam'].to(self.device) alph = group['alpha'].to(self.device) for p in group['params']: state = self.state[p] S = state['S'] X = state['X'] Y = state['Y'] hv = state['accumulated_hess_vec'] STWS = state['STWS'] STLS = state['STLS'] ip = state['inner_product'] vec = state['vec'] S_norm = torch.norm(vec)**2 S.data[..., m] = (vec / torch.sqrt(S_norm)).data Y.data[..., m] = (hv / torch.sqrt(S_norm)).data delta = (Y[..., :m + 1] - alph * S[..., :m + 1]).view( -1, m + 1) STWS.data[:m, m] = w_var * \ torch.mv(S[..., :m].view(-1, m).t(), S[..., m].view(-1)) STWS.data[m, :m] = STWS[:m, m] STWS.data[m, m] = w_var * S_norm STLS.data[m] = lam * S_norm stls = torch.sqrt(STLS[:m + 1]) D, V = torch.symeig( #D: eigenvalues, V: eigenvectors (STWS[:m + 1, :m + 1] / stls) / stls.unsqueeze(1), eigenvectors=True) F_V = V / stls.unsqueeze(1) X.data[:, :m + 1] = torch.mm( (torch.mm(delta, F_V) / torch.sqrt(lam)) / (w_var / lam * D + 1.0), F_V.t()) / torch.sqrt(lam) ip[:m + 1, :m + 1] = alph * w_var**2 * \ torch.mm(X[:, :m + 1].t(), S[..., :m + 1].view(-1, m + 1)) self.update_counter += 1
def _prox(self, metric_mat, _x_train, eta): # TODO: move this function to utilities eig_val, eig_vec = torch.symeig(metric_mat, True) med = eig_val.median() * 0.8 if med < eta: eig_val = torch.relu(eig_val - med) eta *= med else: eig_val = torch.relu(eig_val - eta) space_dim = (eig_val > 1e-8).sum() s_dim = eig_val.shape[0] - space_dim new_metric = ( eig_vec[:, s_dim:] @ eig_val[s_dim:].diag() @ eig_vec[:, s_dim:].T) space_dim = torch.matrix_rank(new_metric) if space_dim != new_metric.shape[0]: _x_data = lp_normalize(self.reddim_mat @ torch.cat(_x_train, 1), self.p_norm) autocorr_mat = new_metric @ _x_data @ _x_data.T d, sing_vec = autocorr_mat.cpu().eig(True) d = d.to(autocorr_mat) sing_vec = sing_vec.to(autocorr_mat) sing_vec = sing_vec * d[:, 0] sing_vec, _, _ = sing_vec.svd() proj_basis = sing_vec[:, :space_dim].T for _ in range(10): _tmp_new_metric = proj_basis @ new_metric @ proj_basis.T _rank = torch.matrix_rank(_tmp_new_metric) if _rank < space_dim: space_dim = _rank proj_basis = sing_vec[:, :space_dim].T _tmp_new_metric = proj_basis @ new_metric @ proj_basis.T else: break new_metric = _tmp_new_metric _reddim_mat = proj_basis @ self.reddim_mat _red_subs = proj_basis @ self.sub_basis.permute((2, 0, 1)) sub_basis, _ = torch.qr(_red_subs) sub_basis = sub_basis.to(_red_subs.device).permute((1, 2, 0)) else: _reddim_mat = self.reddim_mat sub_basis = self.sub_basis return new_metric, _reddim_mat, sub_basis, eta
def fista(X, D, lambd, max_iter=250, tol=1e-4, verbose=False, return_history=False): n_ex, n_feat = X.shape n_act = D.shape[0] gram = D.t().mm(D) L = 2. * torch.symeig(gram)[0][-1] zero = torch.tensor(0., dtype=X.dtype, device=X.device) yt = torch.zeros((n_ex, n_act), dtype=X.dtype, device=X.device) xtm = torch.zeros_like(yt) if return_history: yth = torch.zeros((max_iter, n_ex, n_act), dtype=X.dtype, device=X.device) t = 1. se = torch.sum((X)**2, dim=1).mean().detach().cpu().numpy() seo = se for ii in range(max_iter): diff = X - yt.mm(D) sep = torch.sum( (diff)**2, dim=1).mean() + lambd * abs(yt).sum(dim=1).mean() sep = sep.detach().cpu().numpy() if (se - sep) / max(1., max(abs(se), abs(sep))) < tol: if ii > 0: break se = sep grad = -(diff).mm(D.t()) xt = yt - grad / L xt = torch.max(abs(xt) - lambd / L, zero) * torch.sign(xt) t = 0.5 * (1. + np.sqrt(1. + 4 * t**2)) yt = xt + (t - 1.) * (xt - xtm) / t xtm = xt if return_history: yth[ii] = yt if verbose: print('inference', ii, seo, sep) if return_history: return yth[:ii] else: return yt
def stn(self, x, u): # A_vec = self.fc_stn(u) A_vec = self.fc_stn(torch.cat([u, x.reshape(-1, 28 * 28)], 1)) A = convert_Avec_to_A(A_vec) _, evs = torch.symeig(A, eigenvectors=True) tcos, tsin = evs[:, 0:1, 0:1], evs[:, 1:2, 0:1] self.theta_angle = torch.atan2(tsin[:, 0, 0], tcos[:, 0, 0]) # clock-wise rotate theta theta_0 = torch.cat([tcos, tsin, tcos * 0], 2) theta_1 = torch.cat([-tsin, tcos, tcos * 0], 2) theta = torch.cat([theta_0, theta_1], 1) grid = F.affine_grid(theta, x.size()) x = F.grid_sample(x, grid) return x
def init_teacher_(model_emb, teacher_emb): assert model_emb.size(0) == teacher_emb.size( 0), "Vocabulary sizes are different." if model_emb.size(1) == teacher_emb.size(1): model_emb.copy_(teacher_emb.to(model_emb.device)) elif model_emb.size(1) > teacher_emb.size(1): model_emb.narrow(1, 0, teacher_emb.size(1)).copy_( teacher_emb.to(model_emb.device)) else: teacher_emb = teacher_emb.to(get_device()) print(teacher_emb.size()) print(torch.matmul(teacher_emb.t(), teacher_emb).size()) _, phi = torch.symeig(torch.matmul(teacher_emb.t(), teacher_emb), eigenvectors=True) model_emb.copy_( torch.matmul(teacher_emb, phi[:, :model_emb.size(1)]).to(model_emb.device))
def symsqrt(a, cond=None, return_rank=False): """Computes the symmetric square root of a positive definite matrix""" s, u = torch.symeig(a, eigenvectors=True) cond_dict = { torch.float32: 1e3 * 1.1920929e-07, torch.float64: 1E6 * 2.220446049250313e-16 } if cond in [None, -1]: cond = cond_dict[a.dtype] above_cutoff = (abs(s) > cond * torch.max(abs(s))) psigma_diag = torch.sqrt(s[above_cutoff]) u = u[:, above_cutoff] B = u @ torch.diag(psigma_diag) @ u.t() if return_rank: return B, len(psigma_diag) else: return B
def lyapunov_svd(A, C, rtol=1e-4, use_svd=False): """Solve AX+XA=C""" assert A.shape[0] == A.shape[1] assert len(A.shape) == 2 if use_svd: U, S, V = torch.svd(A) else: S, U = torch.symeig(A, eigenvectors=True) S = S.diag() @ torch.ones(A.shape) X = U @ ((U.t() @ C @ U) / (S + S.t())) @ U.t() error = A @ X + X @ A - C relative_error = torch.max(torch.abs(error)) / torch.max(torch.abs(A)) if relative_error > rtol: print(f"Warning, error {relative_error} encountered in lyapunov_svd") return X
def fit(self, images): # 変換行列と平均をデータから計算 x = images[0][0].reshape(1, -1) self.mean = torch.zeros([1, x.size()[1]]).to(self.device) con_matrix = torch.zeros([x.size()[1], x.size()[1]]).to(self.device) for i in range(len(images)): # 各データについての平均を取る x = images[i][0].reshape(1, -1).to(self.device) self.mean += x / len(images) con_matrix += torch.mm(x.t(), x) / len(images) if i % 10000 == 0: print("{0}/{1}".format(i, len(images))) self.E, self.V = torch.symeig(con_matrix, eigenvectors=True) # 固有値分解 self.E = torch.max(self.E, torch.zeros_like(self.E)) # 誤差の影響で負になるのを防ぐ self.ZCA_matrix = torch.mm( torch.mm(self.V, torch.diag((self.E.squeeze() + self.epsilon)**(-0.5))), self.V.t()) print("completed!")
def test_minimize(dtype, device, clss): torch.manual_seed(400) random.seed(100) nr = 3 nbatch = 2 A = torch.nn.Parameter((torch.randn( (nr, nr)) * 0.5).to(dtype).requires_grad_()) diag = torch.nn.Parameter( torch.randn((nbatch, nr)).to(dtype).requires_grad_()) # bias will be detached from the optimization line, so set it undifferentiable bias = torch.zeros((nbatch, nr)).to(dtype) y0 = torch.randn((nbatch, nr)).to(dtype) fwd_options = { "method": "broyden1", "max_niter": 50, "f_tol": 1e-9, "alpha": -0.5, } activation = "square" # square activation makes it easy to optimize model = clss(A, addx=False, activation=activation, sumoutput=True) model.set_diag_bias(diag, bias) y = minimize(model.forward, y0, **fwd_options) # check the grad (must be close to 1) with torch.enable_grad(): y1 = y.clone().requires_grad_() f = model.forward(y1) grady, = torch.autograd.grad(f, (y1, )) assert torch.allclose(grady, grady * 0) # check the hessian (must be posdef) h = hess(model.forward, (y1, ), idxs=0).fullmatrix() eigval, _ = torch.symeig(h) assert torch.all(eigval >= 0) def getloss(A, y0, diag, bias): model = clss(A, addx=False, activation=activation, sumoutput=True) model.set_diag_bias(diag, bias) y = minimize(model.forward, y0, **fwd_options) return y gradcheck(getloss, (A, y0, diag, bias)) gradgradcheck(getloss, (A, y0, diag, bias))
def test_minimize_methods(dtype, device): torch.manual_seed(400) random.seed(100) dtype = torch.float64 nr = 3 nbatch = 2 default_fwd_options = { "max_niter": 50, "f_tol": 1e-9, "alpha": -0.5, } # list the methods and the options here methods_and_options = { "broyden1": default_fwd_options, } A = torch.nn.Parameter((torch.randn( (nr, nr)) * 0.5).to(dtype).requires_grad_()) diag = torch.nn.Parameter( torch.randn((nbatch, nr)).to(dtype).requires_grad_()) # bias will be detached from the optimization line, so set it undifferentiable bias = torch.zeros((nbatch, nr)).to(dtype) y0 = torch.randn((nbatch, nr)).to(dtype) activation = "square" # square activation makes it easy to optimize for method in methods_and_options: fwd_options = {**methods_and_options[method], "method": method} model = DummyModule(A, addx=False, activation=activation, sumoutput=True) model.set_diag_bias(diag, bias) y = minimize(model.forward, y0, **fwd_options) # check the grad (must be close to 1) with torch.enable_grad(): y1 = y.clone().requires_grad_() f = model.forward(y1) grady, = torch.autograd.grad(f, (y1, )) assert torch.allclose(grady, grady * 0) # check the hessian (must be posdef) h = hess(model.forward, (y1, ), idxs=0).fullmatrix() eigval, _ = torch.symeig(h) assert torch.all(eigval >= 0)
def nearestPDHack(c): """ this probably kinda works but is really slow""" eps = torch.finfo(c.dtype).eps k = 1 while not isPD(c): # fix it so we have positive definite matrix # could also use the Higham algorithm for more accuracy # N.J. Higham, "Computing a nearest symmetric positive semidefinite # https://gist.github.com/fasiha/fdb5cec2054e6f1c6ae35476045a0bbd print('covariance matrix not positive definite, attempting recovery') e, v = torch.symeig(c, eigenvectors=True) bump = eps * k**2 e[e < bump] += bump c = torch.matmul(v, torch.matmul(e.diag_embed(), v.t())) k += 1 return c
def chen_estimate(im, pch_size=8): im = torch.squeeze(im) #grayscale im = im.unsqueeze(0) pch = im2patch(im, pch_size, 3) num_pch = pch.size()[3] pch = pch.view((-1, num_pch)) d = pch.size()[0] mu = torch.mean(pch, dim=1, keepdim=True) X = pch - mu sigma_X = torch.matmul(X, torch.t(X)) / num_pch sig_value, _ = torch.symeig(sigma_X, eigenvectors=True) sig_value = sig_value.sort().values start = time.time() # tensor operation for substituting iterative step. # These operation make parallel computing possiblie which is more efficient triangle = torch.ones((d, d)) triangle = torch.tril(triangle).cuda() sig_matrix = torch.matmul(triangle, torch.diag(sig_value)) # calculate whole threshold value at a single time num_vec = torch.arange(d) + 1 num_vec = num_vec.to(dtype=torch.float32).cuda() sum_arr = torch.sum(sig_matrix, dim=1) tau_arr = sum_arr / num_vec tau_mat = torch.matmul(torch.diag(tau_arr), triangle) # find median value with masking scheme: big_bool = torch.sum(sig_matrix > tau_mat, axis=1) small_bool = torch.sum(sig_matrix < tau_mat, axis=1) mask = (big_bool == small_bool).to(dtype=torch.float32).cuda() tau_chen = torch.max(mask * tau_arr) # Previous implementation # for ii in range(-1, -d-1, -1): # tau = torch.mean(sig_value[:ii]) # if torch.sum(sig_value[:ii]>tau) == torch.sum(sig_value[:ii] < tau): # return torch.sqrt(tau) # print('old: ', torch.sqrt(tau)) return torch.sqrt(tau_chen)
def whitening(fc): # f = [C, H*W] mc = torch.mean(fc, dim=-1) # [C] mc = mc.unsqueeze(1) # .reshape(mc, [-1, 1]) fc -= mc covar = torch.matmul(fc, torch.transpose(fc, 0, 1)) # [C, C] eigenvalues, Ec = torch.symeig( covar, eigenvectors=True) # np.linalg.eigh(covar) # ([C], [C, C]) eigenvalues = torch.pow(eigenvalues, -0.5) Dc = torch.diag(eigenvalues) # [C, C] mid = torch.matmul(Ec, torch.matmul(Dc, torch.transpose(Ec, 0, 1))) return torch.matmul(mid, fc) # [C, H*W]
def split(self, prior_count=1.): '''Split the distribution into two Normal distribution (of the same type) by moving their mean by +- one standard deviation. Args: prior_count (float): Prior count to reset the distirbution. Returns: ``Normal``: First Normal distribution. ``Normal``: Second Normal distribution. ''' evals, evecs = torch.symeig(self.cov, eigenvectors=True) mean1 = self.mean + evecs.t() @ torch.sqrt(evals) mean2 = self.mean - evecs.t() @ torch.sqrt(evals) return self.create(mean1, self.cov, self.count), \ self.create(mean2, self.cov, self.count)
def test_symeig(self): lazy_tensor = self.create_lazy_tensor().detach().requires_grad_(True) lazy_tensor_copy = lazy_tensor.clone().detach().requires_grad_(True) evaluated = self.evaluate_lazy_tensor(lazy_tensor_copy) # Perform forward pass evals_unsorted, evecs_unsorted = lazy_tensor.symeig(eigenvectors=True) evecs_unsorted = evecs_unsorted.evaluate() # since LazyTensor.symeig does not sort evals, we do this here for the check evals, idxr = torch.sort(evals_unsorted, dim=-1, descending=False) evecs = torch.gather(evecs_unsorted, dim=-1, index=idxr.unsqueeze(-2).expand(evecs_unsorted.shape)) evals_actual, evecs_actual = torch.symeig(evaluated.double(), eigenvectors=True) evals_actual = evals_actual.to(dtype=evaluated.dtype) evecs_actual = evecs_actual.to(dtype=evaluated.dtype) # Check forward pass self.assertAllClose(evals, evals_actual, **self.tolerances["symeig"]) lt_from_eigendecomp = evecs @ torch.diag_embed(evals) @ evecs.transpose(-1, -2) self.assertAllClose(lt_from_eigendecomp, evaluated, **self.tolerances["symeig"]) # if there are repeated evals, we'll skip checking the eigenvectors for those any_evals_repeated = False evecs_abs, evecs_actual_abs = evecs.abs(), evecs_actual.abs() for idx in itertools.product(*[range(b) for b in evals_actual.shape[:-1]]): eval_i = evals_actual[idx] if torch.unique(eval_i.detach()).shape[-1] == eval_i.shape[-1]: # detach to avoid pytorch/pytorch#41389 self.assertAllClose(evecs_abs[idx], evecs_actual_abs[idx], **self.tolerances["symeig"]) else: any_evals_repeated = True # Perform backward pass symeig_grad = torch.randn_like(evals) ((evals * symeig_grad).sum()).backward() ((evals_actual * symeig_grad).sum()).backward() # Check grads if there were no repeated evals if not any_evals_repeated: for arg, arg_copy in zip(lazy_tensor.representation(), lazy_tensor_copy.representation()): if arg_copy.requires_grad and arg_copy.is_leaf and arg_copy.grad is not None: self.assertAllClose(arg.grad, arg_copy.grad, **self.tolerances["symeig"]) # Test with eigenvectors=False _, evecs = lazy_tensor.symeig(eigenvectors=False) self.assertIsNone(evecs)
def M_step(self): self.pi = torch.mean(self.t, 1) for k in range(self.K): tk = self.t[k, :] stack = self.y[tk > self.eps, :] tk = tk[tk > self.eps] tk = tk / torch.sum(tk) self.mu[:, k] = torch.mean(tk.unsqueeze(1) * stack, 0) centeredstack = stack - self.mu[:, k] Sk = torch.matmul(centeredstack.transpose(1, 0), tk.unsqueeze(1) * centeredstack) ev, v = torch.symeig(Sk, eigenvectors=True) meanev = torch.cumsum(ev, 0) / torch.arange( 1, self.p + 1, device=self.ev.device) self.d[k] = torch.sum((meanev - self.b) > 0) self.ev[k, :] = ev self.Q[k] = v[:, int(self.p - self.d[k]):]
def __init__( self, mean: Tensor, cov: Tensor, seed: Optional[int] = None, inv_transform: bool = False, ) -> None: r"""Engine for qMC sampling from a multivariate Normal `N(\mu, \Sigma)`. Args: mean: The mean vector. cov: The covariance matrix. seed: The seed with which to seed the random number generator of the underlying SobolEngine. inv_transform: If True, use inverse transform instead of Box-Muller. """ # validate inputs if not cov.shape[0] == cov.shape[1]: raise ValueError("Covariance matrix is not square.") if not mean.shape[0] == cov.shape[0]: raise ValueError("Dimension mismatch between mean and covariance.") if not torch.allclose(cov, cov.transpose(-1, -2)): raise ValueError("Covariance matrix is not symmetric.") self._mean = mean self._normal_engine = NormalQMCEngine( d=mean.shape[0], seed=seed, inv_transform=inv_transform ) # compute Cholesky decomp; if it fails, do the eigendecomposition try: self._corr_matrix = torch.cholesky(cov).transpose(-1, -2) except RuntimeError: eigval, eigvec = torch.symeig(cov, eigenvectors=True) if not torch.all(eigval >= -1e-8): raise ValueError("Covariance matrix not PSD.") eigval_root = eigval.clamp_min(0.0).sqrt() self._corr_matrix = (eigvec * eigval_root).transpose(-1, -2)