def getEps(e1, e2, params, mode='P'): ''' mode : 'P' => given P => params = P 'R' => given r1 & r2 in params => params = (r1, r2) ''' if mode == 'R': r1, r2 = params P = getP(r1, r2) else: P = params # For multiple r1 & r2 values, we'll have to do matrix multiplication # as P will be an array as well if isinstance(P, Iterable): if torch.is_tensor(e1): if P.ndim == 0: ea = e1 * (3 - 2 * P) + 2 * e2 * P eb = e1 * P + e2 * (3 - P) else: ea = torch.outer(e1, (3 - 2 * P)) + 2 * torch.outer(e2, P) eb = torch.outer(e1, P) + torch.outer(e2, 3 - P) else: ea = np.outer(e1, (3 - 2 * P)) + 2 * np.outer(e2, P) eb = np.outer(e1, P) + np.outer(e2, 3 - P) # For a single (r1, r2) sample pair, ea & eb will be same as e1 & e2 else: ea = e1 * (3 - 2 * P) + 2 * e2 * P eb = e1 * P + e2 * (3 - P) return ea, eb
def qt_cal(x, y): input_outer_product = torch.outer(x, x) output_outer_product = torch.outer(y, y) (input_evals, input_evecs) = torch.eig(input_outer_product, eigenvectors=True) (output_evals, output_evecs) = torch.eig(output_outer_product, eigenvectors=True)
def qt_cal_density_matrix(x): # https://stackoverflow.com/questions/66894586/eig-cpu-not-implemented-for-long-in-torch-eigx outer_product = torch.outer(x, x) (evals, evecs) = torch.eig(outer_product, eigenvectors=True) density_matrix = torch.zeros(len(x), len(x), dtype=torch.float) for i, eigval in enumerate(evals): eigval = eigval[0] density_matrix += (eigval * torch.outer(evecs[:, i], evecs[:, i])) return density_matrix
def learn(self) -> None: """Updates weights with XCAL learning equation.""" # Compute weight changes srs = torch.outer(self.post.avg_s, self.pre.avg_s) srm = torch.outer(self.post.avg_m, self.pre.avg_m) s_mix = 0.9 sm_mix = s_mix * srs + (1 - s_mix) * srm # Compute cos diff avg cos_diff_avg = self.post.cos_diff_avg if not self.spec.cos_diff_thr_l_mix: cos_diff_avg = 1 if not self.post.hidden: cos_diff_avg = 0 # Clamped layers should not use Hebbian learning # Compute the learning rate modifier, if enabled lrate_mod = 1.0 if self.spec.cos_diff_lrate: diff = self.post.cos_diff diff_avg = self.post.cos_diff_avg lo_diff = 0.0 lo_lrate = 0.01 hi_diff = 1.0 hi_lrate = 0.01 if diff <= lo_diff: lrate_mod = lo_lrate elif diff >= hi_diff: lrate_mod = hi_lrate elif diff < diff_avg: lrate_mod = 1.0 - ((diff_avg - diff) / (diff_avg - lo_diff)) lrate_mod = lo_lrate + (1.0 - lo_lrate) * lrate_mod else: lrate_mod = 1.0 - ((diff - diff_avg) / (hi_diff - diff_avg)) lrate_mod = hi_lrate + (1.0 - hi_lrate) * lrate_mod lthr = torch.outer(self.post.avg_l, self.pre.avg_m * self.spec.thr_l_mix * cos_diff_avg) mthr = (1 - self.spec.thr_l_mix * cos_diff_avg) * srm dwts = self.spec.lrate * xcal(sm_mix, lthr + mthr) # comment otherwise does not learn # dwts[~self.mask] = 0 # Apply weights mask = dwts > 0 dwts[mask] *= 1 - self.fwts[mask] dwts[~mask] *= self.fwts[~mask] self.fwts += dwts self.wts = sig(self.spec.sig_gain, self.spec.sig_offset, self.fwts)
def bfgs_method(f, fprime, params, x0, maxiter=None, epsi=10e-3): if maxiter is None: maxiter = 1000 k = 0 field_shape = x0.shape gfk = fprime(x0, *params) gfk = torch.flatten(gfk) x0 = torch.flatten(x0) N = int(x0.shape[0]) I = torch.eye(N, dtype=torch.float64, device=device) Hk = I xk = x0 while torch.linalg.norm(gfk).item() > epsi and k < maxiter: # pk - direction of search pk = -torch.matmul(Hk, gfk) # line_search = sp.optimize.line_search(lambda x: f(torch.from_numpy(x), *params).cpu().numpy(), lambda x: fprime(torch.from_numpy(x), *params).cpu().numpy(), xk.cpu().numpy(), pk.cpu().numpy()) # alpha_k = line_search[0] alpha_k = 1 xkp1 = xk + alpha_k * pk sk = xkp1 - xk xk = xkp1 gfkp1 = fprime(xkp1.view(field_shape), *params) gfkp1 = torch.flatten(gfkp1) yk = gfkp1 - gfk gfk = gfkp1 k += 1 ro = 1.0 / (torch.matmul(yk, sk)) A1 = I - ro * torch.outer(sk, yk) A2 = I - ro * torch.outer(yk, yk) Hk = torch.matmul(A1, torch.matmul(Hk, A2)) + (ro * torch.outer(sk, sk)) return (xk.view(field_shape), k)
def get_shape_operator_poole(pt_grad, pt_hess): """ Adapted from descriptions given in: B Poole, S Lahiri, M Raghu, J Sohl-Dickstein, S Ganguli (2016) - Exponential Expressivity in Deep Neural Networks Through Transient Chaos code: https://github.com/ganguli-lab/deepchaos """ pt_grad = pt_grad.squeeze() pt_hess = pt_hess.squeeze() grad_scale = torch.linalg.norm(pt_grad) normed_grad = pt_grad / grad_scale normed_hess = pt_hess / grad_scale projected_hess = normed_hess - torch.outer( normed_grad, torch.matmul(normed_grad, normed_hess)) shape_operator = projected_hess - torch.outer( torch.matmul(projected_hess, normed_grad), normed_grad) return shape_operator
def __init__(self, alpha, cns, dns): size = torch.numel(cns[0]) dtype, device = cns[0].dtype, cns[0].device self.mat = torch.eye(size, dtype=dtype, device=device) self.mat *= alpha for i in range(len(cns)): self.mat += torch.outer(cns[i], dns[i])
def preconditioned_grad(self, damping: float = 0.001) -> None: """Compute precondition gradient of each weight in module. Preconditioned gradients can be applied to the actual gradients with `update_gradient()`. Note the steps are separate in the event that intermediate steps will be applied to the preconditioned gradient. Args: damping (float, optional): damping to use if preconditioning using the eigendecomposition method (default: 0.001). """ if (self.qa is None or self.qg is None or (not self.prediv_eigenvalues and self.da is None) or (not self.prediv_eigenvalues and self.dg is None) or (self.prediv_eigenvalues and self.dgda is None)): raise RuntimeError( 'Eigendecompositions for both A and G have not been computed', ) grad = self.module.get_grad() grad_type = grad.dtype grad = grad.to(self.qa.dtype) v1 = self.qg.t() @ grad @ self.qa if self.prediv_eigenvalues: v2 = v1 * self.dgda else: v2 = v1 / (torch.outer( cast(torch.Tensor, self.dg), cast(torch.Tensor, self.da), ) + damping) self.grad = (self.qg @ v2 @ self.qa.t()).to(grad_type)
def _loss_fn(self, pred, data): pdists = pred["dists"] w = data.node_norm[data.cell_mask].sqrt() weights = torch.outer(w, w) loss = torch.sum(weights * (pdists - data.dists)**2) return loss
def add_fixed_points(self, n_patterns): patterns = (2 * torch.eye(self.args.N) - 1)[:n_patterns, :] W_patt = torch.zeros((self.args.N, self.args.N)) for p in patterns: p_tensor = torch.as_tensor(p) W_patt += torch.outer(p_tensor, p_tensor) self.J.weight.data += self.args.fixed_beta * W_patt / self.args.N / n_patterns
def __init__( self, filter_type=[1, 3, 3, 1], stride=1, padding=1, factor=1, direction="vh", ring=True, ): super().__init__() self.filter_type = filter_type self.stride = stride self.padding = _quadruple(padding) self.factor = factor self.direction = direction self.pad = Pad( padding=self.padding, horizontal="circular" if ring else "reflect", vertical="reflect", ) kernel = torch.tensor(self.filter_type, dtype=torch.float32) if direction == "vh": kernel = torch.outer(kernel, kernel) elif direction == "v": kernel = kernel[:, None] elif direction == "h": kernel = kernel[None, :] else: raise ValueError kernel /= kernel.sum() if factor > 1: kernel *= factor ** 2 self.register_buffer("kernel", kernel[None, None])
def _est_additive_noise( subdata: torch.Tensor, calculation_dtype: torch.dtype = torch.float ) -> Tuple[torch.Tensor, torch.Tensor]: # estimate the additive noise in the given data with a certain precision eps = 1e-6 dim0data, dim1data = subdata.shape dtp = subdata.dtype subdata = subdata.to(dtype=calculation_dtype) w = torch.zeros(subdata.shape, dtype=calculation_dtype, device=subdata.device) ddp = subdata @ torch.conj(subdata).T hld = (ddp + eps) @ torch.eye(int(dim0data), dtype=calculation_dtype, device=subdata.device) ddpi = torch.inverse(hld) for i in range(dim0data): xx = ddpi - (torch.outer(ddpi[:, i], ddpi[i, :]) / ddpi[i, i]) # XX = RRi - (RRi(:,i)*RRi(i,:))/RRi(i,i); ddpa = ddp[:, i] # RRa = RR(:,i); ddpa[i] = 0.0 # RRa(i)=0; % this remove the effects of XX(:,i) beta = xx @ ddpa # beta = XX * RRa; beta[i] = 0 # beta(i)=0; % this remove the effects of XX(i,:) w[i, :] = subdata[i, :] - (beta @ subdata) # ret = torch.diag(torch.diag(ddp / dim1data)) # Rw=diag(diag(w*w'/N)); # print("here", w.shape) hold2 = torch.matmul(w, w.T) / float(subdata.shape[1]) ret = torch.diag(torch.diagonal(hold2)) w = w.to(dtype=dtp) ret = ret.to(dtype=dtp) return w, ret
def _generator(self, data_size): dist_bern = torch.distributions.Bernoulli(self.p) self.u = dist_bern.sample((data_size, )).squeeze() self.u[self.u == 0] = -1 y = torch.outer(self.u, self.v) / self.d y += torch.normal(0, self.std, (data_size, self.data_dim)) return y
def compute(self): assert self.capture_mean_cov num_items = self.num_items_metric.compute() mean = self.raw_mean / num_items cov = self.raw_cov / num_items cov = cov - torch.outer(mean, mean) return mean, cov
def train_one_batch(self, batch: Tensor): embeddings = self._embed(batch) # n * 512 b = embeddings.size(0) for i in range(b): self.cov_sum += torch.outer(embeddings[i, :], embeddings[i, :]) self.mean += embeddings.sum(dim=0) self.N += b
def cross_one_hot(feature_a: torch.Tensor, feature_b: torch.Tensor) -> torch.Tensor: """Computes the feature cross of two one-hot encoded features.""" return torch.vstack([ torch.flatten(torch.outer(feature_a[i], feature_b[i])) for i in range(len(feature_a)) ])
def _online_update(features: torch.Tensor, total: torch.Tensor, sigma: torch.Tensor) -> None: total += features if LooseVersion(torch.__version__) <= LooseVersion("1.7.0"): sigma += torch.ger(features, features) else: sigma += torch.outer(features, features)
def _generate_centers(self, v1, v2): n1 = normalize(v1, dim=0, p=2) n2 = normalize(v2, dim=0, p=2) n2 = normalize(n2 - torch.dot(n1, n2) * n1, dim=0, p=2) ger_sub = torch.outer(n2, n1) - torch.outer(n1, n2) ger_add = torch.outer(n1, n1) + torch.outer(n2, n2) sin_thetas = torch.unsqueeze(torch.unsqueeze(torch.sin(self.thetas), dim=-1), dim=-1) cos_thetas = torch.unsqueeze(torch.unsqueeze(torch.cos(self.thetas) - 1, dim=-1), dim=-1) R = self.eye_matrix + ger_sub * sin_thetas + ger_add * cos_thetas return torch.einsum('bij,j->bi', R, n1)
def hopfield_reservoir(N, g, patterns, beta): W = torch.zeros((N, N)) W_rand = torch.normal(torch.zeros_like(W), g / np.sqrt(N)) W += W_rand for p in patterns: p_tensor = torch.as_tensor(p) W_patt = torch.outer(p_tensor, p_tensor) / N W += beta * W_patt return W
def get_covariance_matrix(X): ''' Returns the covariance of the data X X should contain a single data point per row of the tensor ''' X_mean = torch.mean(X, dim=0) X_mean_matrix = torch.outer(X_mean, X_mean) X_corr_matrix = torch.matmul(torch.transpose(X, 0, 1), X) / X.size(0) cov = X_corr_matrix - X_mean_matrix return cov
def ifft2d(self, gridy, coeff1, coeff2, k1, k2): # y (batch, N, 2) locations in [0,1]*[0,1] # coeff (batch, channels, kmax, kmax) batchsize = gridy.shape[0] N = gridy.shape[1] device = gridy.device m1 = 2 * k1 m2 = 2 * k2 - 1 # wavenumber (m1, m2) k_x1 = torch.cat((torch.arange(start=0, end=k1, step=1), \ torch.arange(start=-(k1), end=0, step=1)), 0).reshape(m1,1).repeat(1,m2).to(device) k_x2 = torch.cat((torch.arange(start=0, end=k2, step=1), \ torch.arange(start=-(k2-1), end=0, step=1)), 0).reshape(1,m2).repeat(m1,1).to(device) # K = <y, k_x>, (batch, N, m1, m2) K1 = torch.outer(gridy[:, :, 0].view(-1), k_x1.view(-1)).reshape(batchsize, N, m1, m2) K2 = torch.outer(gridy[:, :, 1].view(-1), k_x2.view(-1)).reshape(batchsize, N, m1, m2) K = K1 + K2 # basis (N, m1, m2) basis = torch.exp(1j * 2 * np.pi * K).to(device) # coeff (batch, channels, m1, m2) coeff3 = coeff1[:, :, 1:, 1:].flip(-1, -2).conj() coeff4 = torch.cat([ coeff1[:, :, 0:1, 1:].flip(-1).conj(), coeff2[:, :, :, 1:].flip( -1, -2).conj() ], dim=-2) coeff12 = torch.cat([coeff1, coeff2], dim=-2) coeff43 = torch.cat([coeff4, coeff3], dim=-2) coeff = torch.cat([coeff12, coeff43], dim=-1) # Y (batch, channels, N) Y = torch.einsum("bcxy,bnxy->bcn", coeff, basis) Y = Y.real return Y
def _fspecial_gauss_2d(self, size, sigma): """Create 2-D gauss kernel Args: size (int): the size of gauss kernel sigma (float): sigma of normal distribution Returns: torch.Tensor: 2D kernel (size x size) """ gaussian_vec = self._fspecial_gauss_1d(size, sigma) return torch.outer(gaussian_vec, gaussian_vec)
def get_params(self, epsilon: float = .01): means = self.mean.detach().clone() covs = self.cov_sum.detach().clone() identity = torch.eye(self.feature_size).to(self.device) means /= self.N covs -= self.N * torch.outer(means, means) covs /= self.N - 1 covs += epsilon * identity return means, covs
def blas_lapack_ops(self): m = torch.randn(3, 3) a = torch.randn(10, 3, 4) b = torch.randn(10, 4, 3) v = torch.randn(3) return ( torch.addbmm(m, a, b), torch.addmm(torch.randn(2, 3), torch.randn(2, 3), torch.randn(3, 3)), torch.addmv(torch.randn(2), torch.randn(2, 3), torch.randn(3)), torch.addr(torch.zeros(3, 3), v, v), torch.baddbmm(m, a, b), torch.bmm(a, b), torch.chain_matmul(torch.randn(3, 3), torch.randn(3, 3), torch.randn(3, 3)), # torch.cholesky(a), # deprecated torch.cholesky_inverse(torch.randn(3, 3)), torch.cholesky_solve(torch.randn(3, 3), torch.randn(3, 3)), torch.dot(v, v), torch.eig(m), torch.geqrf(a), torch.ger(v, v), torch.inner(m, m), torch.inverse(m), torch.det(m), torch.logdet(m), torch.slogdet(m), torch.lstsq(m, m), torch.lu(m), torch.lu_solve(m, *torch.lu(m)), torch.lu_unpack(*torch.lu(m)), torch.matmul(m, m), torch.matrix_power(m, 2), # torch.matrix_rank(m), torch.matrix_exp(m), torch.mm(m, m), torch.mv(m, v), # torch.orgqr(a, m), # torch.ormqr(a, m, v), torch.outer(v, v), torch.pinverse(m), # torch.qr(a), torch.solve(m, m), torch.svd(a), # torch.svd_lowrank(a), # torch.pca_lowrank(a), # torch.symeig(a), # deprecated # torch.lobpcg(a, b), # not supported torch.trapz(m, m), torch.trapezoid(m, m), torch.cumulative_trapezoid(m, m), # torch.triangular_solve(m, m), torch.vdot(v, v), )
def _expand_signals(discounted_rewards, temporal_novelty, spatial_novelty, weight_bias_vector): hidden_dim = len(weight_bias_vector) time_dim = len(discounted_rewards) # expand discounted rewards and spatial novelty along hidden dimension discounted_rewards_exp = torch.outer(discounted_rewards, torch.ones(hidden_dim)) spatial_novelty_exp = torch.outer(spatial_novelty, torch.ones(hidden_dim)) # expand temporal novelty along time and hidden dimension temporal_novelty_exp = temporal_novelty * torch.ones( [time_dim, hidden_dim]) # expand weight_bias_vector along time dimension weight_bias_vector_exp = torch.outer(torch.ones(time_dim), weight_bias_vector) return discounted_rewards_exp, temporal_novelty_exp, spatial_novelty_exp, weight_bias_vector_exp
def test_outer_ger_addr_legacy_tests(self, device): for size in ((0, 0), (0, 5), (5, 0)): a = torch.rand(size[0], device=device) b = torch.rand(size[1], device=device) self.assertEqual(torch.outer(a, b).shape, size) self.assertEqual(torch.ger(a, b).shape, size) m = torch.empty(size, device=device) self.assertEqual(torch.addr(m, a, b).shape, size) m = torch.randn(5, 6, device=device) a = torch.randn(5, device=device) b = torch.tensor(6, device=device) self.assertRaises(RuntimeError, lambda: torch.outer(a, b)) self.assertRaises(RuntimeError, lambda: torch.outer(b, a)) self.assertRaises(RuntimeError, lambda: torch.ger(a, b)) self.assertRaises(RuntimeError, lambda: torch.ger(b, a)) self.assertRaises(RuntimeError, lambda: torch.addr(m, a, b)) self.assertRaises(RuntimeError, lambda: torch.addr(m, b, a))
def fix(self, center=True): """Returns the Covariance matrix""" # local variables tlen = self._tlen cov_mtx = self._cov_mtx avg = self._avg / tlen cov_mtx = cov_mtx / tlen if center: avg_mtx = torch.outer(avg, avg) cov_mtx -= avg_mtx return cov_mtx
def forward(self, x, frequencies, x_grid): # Input has size (n_batch, n_channels, n_x_points) n_batch, n_channels, n_x_points = x.shape x = x.type(torch.cfloat) exp_argument = torch.mul(x_grid, -1j * 2 * np.pi) exp_multiplicand = torch.exp(torch.outer(frequencies, exp_argument)) # x has shape b,c,s and exp_multiplicand has shape f,s and we want # output of shape b,c,f out = torch.einsum('bcs,fs->bcf', x, exp_multiplicand) return out
def get_grid_corr_2d(self): x = torch.arange(self.nx) * self.dx + self.xmin y = torch.arange(self.ny) * self.dy + self.xmin x = x.to(self.device) y = y.to(self.device) W = self.config['W'] x_inv_kaiser = _inv_gcf_kaiser(x, self.du, W, self.beta) y_inv_kaiser = _inv_gcf_kaiser(y, self.dv, W, self.beta) gridcorr = torch.outer(x_inv_kaiser, y_inv_kaiser) return gridcorr
def rotation_matrix(theta: torch.Tensor, n_1: torch.Tensor, n_2: torch.Tensor) -> torch.Tensor: """ This method returns a rotation matrix which rotates any vector in the 2 dimensional plane spanned by @n1 and @n2 an angle @theta. The vectors @n1 and @n2 have to be orthogonal. Inspired by https://analyticphysics.com/Higher%20Dimensions/Rotations%20in%20Higher%20Dimensions.htm :param @n1: first vector spanning 2-d rotation plane, needs to be orthogonal to @n2 :param @n2: second vector spanning 2-d rotation plane, needs to be orthogonal to @n1 :param @theta: rotation angle :returns : rotation matrix """ dim = len(n_1) assert len(n_1) == len(n_2) assert (n_1.dot(n_2).abs() < 1e-4) return ( torch.eye(dim) + (torch.outer(n_2, n_1) - torch.outer(n_1, n_2)) * torch.sin(theta) + (torch.outer(n_1, n_1) + torch.outer(n_2, n_2)) * (torch.cos(theta) - 1))