def Sever_CG(actor_loss_grad, actor_grad_logp, n, nsteps=10, r=4, p=0.05): search_dir = None indices = list(range(n)) for i in range(r): start_time = time.time() search_dir = conjugate_gradient_sever( actor_grad_logp[indices], actor_loss_grad[indices].mean(dim=0), x=search_dir, nsteps=nsteps) # print("--- conjugate_gradient_sever: %s seconds ---" % (time.time() - start_time)) grads = actor_grad_logp[indices] * torch.mv( actor_grad_logp[indices], search_dir).unsqueeze(dim=1) - actor_loss_grad[indices] ## Fx-g mean_grads = grads.mean(dim=(-2, ), keepdim=True) grads = grads - grads.mean(dim=(-2, ), keepdim=True) start_time = time.time() u, s, v = torch.svd_lowrank(grads) # print("--- svd time: %s seconds ---" % (time.time() - start_time)) start_time = time.time() top_right_eigenvector = v[:, 0] u, s, v = torch.svd_lowrank(grads) top_right_eigenvector = v[:, 0] outlier_score = torch.mv(grads, top_right_eigenvector)**2 _, topk_index = torch.topk(outlier_score, k=round(n * p)) for index in sorted(topk_index.tolist(), reverse=True): del indices[index] # print("--- time after svd: %s seconds ---" % (time.time() - start_time)) return search_dir, indices
def get_jacobian_svd(train_loader, net, batchsize, num_classes, weights=None, device=None): log("Computing jacobian") slices = get_slices(net) jac = get_jacobian(train_loader, net, batchsize, num_classes, device) if weights is not None: weighted_jacobian(jac, slices, weights) log("Computing SVD of jacobian with shape {}".format(jac.shape)) U, D, V = torch.svd_lowrank(jac, q=len(jac)) splitted_norms = np.zeros([len(slices), len(D)]) for i, v in enumerate(V.T): for j, [ind_s, ind_e] in enumerate(slices): d = ind_e - ind_s vec_partial = v[ind_s: ind_e] splitted_norms[j][i] = torch.norm(vec_partial) return D.cpu().numpy(), splitted_norms, slices, net.layer_names
def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[2] dataset_config['path'] = dataset_config['path'][:-4] + str(1) dataset = get_dataset(dataset_config) adj = generate_daj_mat(dataset) part_adj = adj[:dataset.n_users, dataset.n_users:] part_adj_tensor = get_sparse_tensor(part_adj, 'cpu') with torch.no_grad(): u, s, v = torch.svd_lowrank(part_adj_tensor, 64) sort_ranked_users, sort_ranked_items = graph_rank_nodes(dataset, 'sort') degree_ranked_users, degree_ranked_items = graph_rank_nodes( dataset, 'degree') pr_ranked_users, pr_ranked_items = graph_rank_nodes(dataset, 'page_rank') ranked_users = (sort_ranked_users, degree_ranked_users, pr_ranked_users) ranked_items = (sort_ranked_items, degree_ranked_items, pr_ranked_items) pdf = PdfPages('figure_5.pdf') fig, ax = plt.subplots(nrows=1, ncols=2, constrained_layout=True, figsize=(11, 4)) axes = ax.flatten() plot_error(part_adj, u.cpu().numpy(), ranked_users, axes[0], device, 'users') plot_error( part_adj.T, v.cpu().numpy(), ranked_items, axes[1], device, 'items', ) pdf.savefig() plt.close(fig) pdf.close()
def eigenthings_tensor_utils(t, device=None, out_device='cpu', symmetric=False, topn=-1): t = t.to(device) if topn >= 0: _, eigenvals, eigenvecs = torch.svd_lowrank(t, q=min( topn, t.size()[0], t.size()[1])) eigenvecs.transpose_(0, 1) else: if symmetric: eigenvals, eigenvecs = torch.symeig(t, eigenvectors=True) # pylint: disable=no-member eigenvals = eigenvals.flip(0) eigenvecs = eigenvecs.transpose(0, 1).flip(0) else: _, eigenvals, eigenvecs = torch.svd(t, compute_uv=True) # pylint: disable=no-member eigenvecs = eigenvecs.transpose(0, 1) return eigenvals, eigenvecs
def approximate_sim(src: Tensor, mapping: Tensor, trg: Tensor, rank=1000, niter=2, keep_k=100, batch_size=5000): srclen = src.size(-1) trglen = trg.size(-1) tgm = spspmm(src.t(), mapping) us, ss, vs = torch.svd_lowrank(tgm, rank, niter) vs = vs.t() print('calculate svd complete') # [N, R] [R] [R, T] result = None def merge(a, b): if a is None: return b return torch.sparse_coo_tensor( torch.cat([a._indices(), b._indices()], 1), torch.cat([a._values(), b._values()], 0), [a.size(0) + b.size(0), a.size(1)]) for i_batch in range(0, srclen, batch_size): i_end = min(i_batch + batch_size, srclen) batched_tgm = us[i_batch:i_end].mm(torch.diag(ss)).mm(vs) val, ind = batched_tgm.topk(dim=-1, k=keep_k) batched_tgm = topk2spmat(val, ind, batched_tgm.size(), 0, batched_tgm.device) batched_tgm = spspmm(batched_tgm, trg) # save gpu memory result = merge(result, batched_tgm) if i_batch % 10 * batch_size == 0: print('batch', i_batch, 'complete, result size', result._values().size()) return result
def mds_torch(pre_dist_mat, weights=None, iters=10, tol=1e-5, eigen=False, verbose=2): """ Gets distance matrix. Outputs 3d. See below for wrapper. Assumes (for now) distogram is (N x N) and symmetric Outs: * best_3d_coords: (batch x 3 x N) * historic_stresses: (batch x steps) """ device, dtype = pre_dist_mat.device, pre_dist_mat.type() # ensure batched MDS pre_dist_mat = expand_dims_to(pre_dist_mat, length = ( 3 - len(pre_dist_mat.shape) )) # start batch, N, _ = pre_dist_mat.shape diag_idxs = np.arange(N) his = [torch.tensor([np.inf]*batch, device=device)] # initialize by eigendecomposition: https://www.lptmc.jussieu.fr/user/lesne/bioinformatics.pdf # follow : https://www.biorxiv.org/content/10.1101/2020.11.27.401232v1.full.pdf D = pre_dist_mat**2 M = 0.5 * (D[:, :1, :] + D[:, :, :1] - D) # do loop svd bc it's faster: (2-3x in CPU and 1-2x in GPU) # https://discuss.pytorch.org/t/batched-svd-lowrank-being-much-slower-than-loop-implementation-both-cpu-and-gpu/119336 svds = [torch.svd_lowrank(mi) for mi in M] u = torch.stack([svd[0] for svd in svds], dim=0) s = torch.stack([svd[1] for svd in svds], dim=0) v = torch.stack([svd[2] for svd in svds], dim=0) best_3d_coords = torch.bmm(u, torch.diag_embed(s).sqrt())[..., :3] # only eigen - way faster but not weights if weights is None and eigen==True: return torch.transpose( best_3d_coords, -1, -2), torch.zeros_like(torch.stack(his, dim=0)) elif eigen==True: if verbose: print("Can't use eigen flag if weights are active. Fallback to iterative") # continue the iterative way if weights is None: weights = torch.ones_like(pre_dist_mat) # iterative updates: for i in range(iters): # compute distance matrix of coords and stress best_3d_coords = best_3d_coords.contiguous() dist_mat = torch.cdist(best_3d_coords, best_3d_coords, p=2).clone() stress = ( weights * (dist_mat - pre_dist_mat)**2 ).sum(dim=(-1,-2)) * 0.5 # perturb - update X using the Guttman transform - sklearn-like dist_mat[ dist_mat <= 0 ] += 1e-7 ratio = weights * (pre_dist_mat / dist_mat) B = -ratio B[:, diag_idxs, diag_idxs] += ratio.sum(dim=-1) # update coords = (1. / N * torch.matmul(B, best_3d_coords)) dis = torch.norm(coords, dim=(-1, -2)) if verbose >= 2: print('it: %d, stress %s' % (i, stress)) # update metrics if relative improvement above tolerance if (his[-1] - stress / dis).mean() <= tol: if verbose: print('breaking at iteration %d with stress %s' % (i, stress / dis)) break best_3d_coords = coords his.append( stress / dis ) return torch.transpose(best_3d_coords, -1,-2), torch.stack(his, dim=0)
def mds_torch(pre_dist_mat, weights=None, iters=10, tol=1e-5, eigen=False, verbose=2): """ Gets distance matrix. Outputs 3d. See below for wrapper. Assumes (for now) distogram is (N x N) and symmetric Outs: * best_3d_coords: (batch x 3 x N) * historic_stresses: (batch x steps) """ device, dtype = pre_dist_mat.device, pre_dist_mat.type() # start batch, N, _ = pre_dist_mat.shape diag_idxs = np.arange(N) his = [torch.tensor([np.inf]*batch, device=device)] # do it by eigendecomposition - way faster but not weights # https://www.biorxiv.org/content/10.1101/2020.11.27.401232v1.full.pdf if eigen == True and weights is None: preds_3d = [] for bi in range(pre_dist_mat.shape[0]): D = pre_dist_mat[bi]**2 M = D[:1, :] + D[:, :1] - D u,s,v = torch.svd_lowrank(M/2) preds_3d.append( ([email protected](s).sqrt())[:, :3].t() ) return torch.stack(preds_3d, dim=0), torch.zeros_like(torch.stack(his, dim=0)) elif eigen == True: if verbose: print("Can't use eigen flag if weights are active. Fallback to iterative") # continue the iterative way if weights is None: weights = torch.ones_like(pre_dist_mat) # ensure batched MDS pre_dist_mat = expand_dims_to(pre_dist_mat, length = ( 3 - len(pre_dist_mat.shape) )) # init random coords best_stress = float("Inf") * torch.ones(batch, device = device).type(dtype) best_3d_coords = 2*torch.rand(batch, N, 3, device = device).type(dtype) - 1 # iterative updates: for i in range(iters): # compute distance matrix of coords and stress dist_mat = torch.cdist(best_3d_coords, best_3d_coords, p=2).clone() stress = ( weights * (dist_mat - pre_dist_mat)**2 ).sum(dim=(-1,-2)) * 0.5 # perturb - update X using the Guttman transform - sklearn-like dist_mat[ dist_mat <= 0 ] += 1e-7 ratio = weights * (pre_dist_mat / dist_mat) B = -ratio B[:, diag_idxs, diag_idxs] += ratio.sum(dim=-1) # update coords = (1. / N * torch.matmul(B, best_3d_coords)) dis = torch.norm(coords, dim=(-1, -2)) if verbose >= 2: print('it: %d, stress %s' % (i, stress)) # update metrics if relative improvement above tolerance if (best_stress - stress / dis).mean() <= tol: if verbose: print('breaking at iteration %d with stress %s' % (i, stress / dis)) break pre_dist_mat = dist_mat best_3d_coords = coords his.append( stress / dis ) return torch.transpose(best_3d_coords, -1,-2), torch.stack(his, dim=0)
#使用clone还有一个好处是会被记录在计算图中,即梯度回传到副本时也会传到源Tensor x = torch.randn(1) print(x) print(x.item()) #线性代数 x = torch.rand(5, 3) print(torch.trace(x)) print(torch.diag(x)) print(torch.triu(x)) #上三角 print(torch.mm(x, torch.rand(3, 1))) print(torch.t(x)) print(torch.dot(x[0, :], torch.rand(3))) #一维向量 print(torch.inverse(x[:3, :])) print(torch.svd_lowrank(x)) #广播 x = torch.arange(1, 3).view(1, 2) print(x) y = torch.arange(1, 4).view(3, 1) print(y) print(x + y) #加法会开辟新内存 x = torch.rand(3, 1) y = torch.rand(3, 1) id_before = id(y) y = x + y print(id(y) == id_before)
def _initialize_H_W(self, eps=1e-6): n_samples, n_features = self.X.shape if self._init_method is None: if self.k < min(n_samples, n_features): self._init_method = 'nndsvdar' else: self._init_method = 'random' if self._init_method in ['nndsvd', 'nndsvda', 'nndsvdar']: U, S, V = torch.svd_lowrank(self.X, q=self.k) H = torch.zeros_like(U, dtype=self._tensor_dtype, device=self._device_type) W = torch.zeros_like(V.T, dtype=self._tensor_dtype, device=self._device_type) H[:, 0] = S[0].sqrt() * U[:, 0] W[0, :] = S[0].sqrt() * V[:, 0] for j in range(2, self.k): x, y = U[:, j], V[:, j] x_p, y_p = x.maximum( torch.zeros_like(x, device=self._device_type)), y.maximum( torch.zeros_like(y, device=self._device_type)) x_n, y_n = x.minimum( torch.zeros_like( x, device=self._device_type)).abs(), y.minimum( torch.zeros_like(y, device=self._device_type)).abs() x_p_nrm, y_p_nrm = x_p.norm(p=2), y_p.norm(p=2) x_n_nrm, y_n_nrm = x_n.norm(p=2), y_n.norm(p=2) m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm if m_p > m_n: u, v, sigma = x_p / x_p_nrm, y_p / y_p_nrm, m_p else: u, v, sigma = x_n / x_n_nrm, y_n / y_n_nrm, m_n factor = (S[j] * sigma).sqrt() H[:, j] = factor * u W[j, :] = factor * v H[H < eps] = 0 W[W < eps] = 0 if self._init_method == 'nndsvda': avg = self.X.mean() H[H == 0] = avg W[W == 0] = avg elif self._init_method == 'nndsvdar': avg = self.X.mean() H[H == 0] = avg / 100 * torch.rand(H[H == 0].shape, dtype=self._tensor_dtype, device=self._device_type) W[W == 0] = avg / 100 * torch.rand(W[W == 0].shape, dtype=self._tensor_dtype, device=self._device_type) elif self._init_method == 'random': avg = torch.sqrt(self.X.mean() / self.k) H = torch.abs(avg * torch.randn((self.X.shape[0], self.k), dtype=self._tensor_dtype, device=self._device_type)) W = torch.abs(avg * torch.randn((self.k, self.X.shape[1]), dtype=self._tensor_dtype, device=self._device_type)) else: raise ValueError( f"Invalid init parameter. Got {self._init_method}, but require one of (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random')." ) self.H = H self.W = W
center].contiguous() curr_img_crop = curr_img_stack[:, :, coord_to_crop[0] - center:coord_to_crop[0] + center, coord_to_crop[1] - center:coord_to_crop[1] + center].detach() # Reconstruction error Y = (curr_img_crop - dense_crop - sparse_crop) # Nuclear norm dense_vector = dense_crop.view(dense_part.shape[0], dense_part.shape[1], -1) with autocast(enabled=False): (u, s, v) = torch.svd_lowrank(dense_vector.permute(0, 2, 1).float(), q=args.rank) sOriginal = torch.autograd.Variable(s.clone()) # eigenvalues thresholding operation s = torch.sign(s) * torch.max( s.abs() - net.mu_sum_constraint, torch.zeros_like(s)) mean_eigen_values += sOriginal.mean(dim=0).detach().cpu() mean_eigen_values_cropped += s.mean(dim=0).detach().cpu() # Reconstruct the images from the eigen information for nB in range(s.shape[0]): currS = torch.diag(s[nB, :]) dense_vector[nB, ...] = torch.mm(torch.mm(u[nB, ...], currS), v[nB, ...].t()).t()