def test_addbmm(self): rand_seed = int(get_rand_seed()) print("{} rand sed: {}".format(sys._getframe().f_code.co_name, rand_seed)) torch.manual_seed(rand_seed) for i in range(8, 12, 2): for j in range(8, 12, 2): alpha = i / 10 beta = j / 10 num_batches = 10 x_auto_mix_a, x_auto_mix_b, add_auto_mix, x_man_bf16_a, x_man_bf16_b, add_man_bf16 = self._gen_mm_tensor( rand_seed, num_batches) with AutoDNNL(True), AutoMixPrecision(False): res_man_bf16 = torch.addbmm(add_man_bf16, x_man_bf16_a, x_man_bf16_b, beta=beta, alpha=alpha) self.assertEqual(res_man_bf16.dtype, torch.bfloat16) with AutoMixPrecision(True): res_auto_mix = torch.addbmm(add_auto_mix, x_auto_mix_a, x_auto_mix_b, beta=beta, alpha=alpha) self.assertEqual(res_auto_mix.dtype, torch.float) self.assertTrue( ipex.core.is_bf16_dil_tensor(res_auto_mix)) self.assertEqual(res_auto_mix, res_man_bf16.float())
def _worker_gradient_descent(self, gidx): gpuid = self._gpu_ids[gidx] chunk_begin, chunk_end = (0, self.chunk_size) local_idx_2sites_batch = self.idx_2sites[gidx][ self.current_bond, :, :, self.batch_head:self.batch_head + self.batch_size_gpu] while chunk_begin < self.batch_size_gpu: cumu1 = self.cumulants[self.current_bond][ gidx, chunk_begin:chunk_end, :, :].cuda(gpuid) cumu2 = self.cumulants[self.current_bond + 1][gidx, chunk_begin:chunk_end, :, :].cuda(gpuid) converter = torch.arange(chunk_end - chunk_begin).type( torch.LongTensor).cuda(gpuid) for i, j in [(i, j) for i in range(2) for j in range(2)]: idx = converter[local_idx_2sites_batch[i, j, chunk_begin:chunk_end]] if (idx.numel() == 0): continue lvecs = cumu1[idx] rvecs = cumu2[idx] psis = lvecs @ self.merged_tensor_BC[gidx][:, i, j, :].repeat( idx.numel(), 1, 1) @ rvecs psis += self.epsi ### this is to resolve the zero psis issue, for the float-storage lvecs /= psis torch.addbmm(self.gradients[gidx][:, i, j, :], lvecs.permute(0, 2, 1), rvecs.permute(0, 2, 1), out=self.gradients[gidx][:, i, j, :]) chunk_begin = chunk_end chunk_end = min(self.batch_size_gpu, chunk_end + self.chunk_size) self._job_queue.get()
def _compute_posterior_means_and_covariances(self, n_all, f_all, batch_size, component_batches): covariances = torch.zeros(self.ivec_dim, batch_size, self.ivec_dim, device=self.t_matrix.device) means = torch.zeros(self.ivec_dim, batch_size, device=self.t_matrix.device) for bstart, bend in component_batches: n = n_all[:, bstart:bend] f = f_all[bstart:bend, :, :] sub_t = self.t_matrix[bstart:bend, :, :] sub_inv_covars = self.inv_covariances[bstart:bend, :, :] sub_tc = torch.bmm(sub_t, sub_inv_covars) tt = torch.bmm(sub_tc, torch.transpose(sub_t, 1, 2)) tt.transpose_(0, 1) covariances += torch.matmul(n, tt) means = torch.addbmm(means, sub_tc, f) covariances.transpose_(0, 1) covariances += self.identity covariances = torch.inverse(covariances) means.t_() means[:, 0] += self.prior_offset means.unsqueeze_(2) means = torch.bmm(covariances, means) means = means.view((means.size()[:2])) return means, covariances
def forward(ctx, add_matrix, batch1, batch2, alpha=1, beta=1, inplace=False): ctx.alpha = alpha ctx.beta = beta ctx.add_matrix_size = add_matrix.size() ctx.save_for_backward(batch1, batch2) output = _get_output(ctx, add_matrix, inplace=inplace) return torch.addbmm(alpha, add_matrix, beta, batch1, batch2, out=output)
def forward(self, x, y, z, m): out1 = x.addbmm(y, z, beta=2, alpha=3) out2 = torch.addbmm(x, y, z, beta=2, alpha=3) #out3 = x.addbmm_(y, z, beta=2, alpha=3) out3 = m.baddbmm(y, z, beta=2, alpha=3) out4 = torch.baddbmm(m, y, z, beta=2, alpha=3) out5 = torch.bmm(y, z) # deterministic is not supported by jit return out1, out2, out3, out4, out5
def forward(self, add_matrix, batch1, batch2): self.save_for_backward(batch1, batch2) output = self._get_output(add_matrix) return torch.addbmm(self.alpha, add_matrix, self.beta, batch1, batch2, out=output)
def get_cameras_accuracy( pred_Rs, gt_Rs, pred_ts, gt_ts, ): ''' Align predicted pose to gt pose and print cameras accuracy''' # find rotation d = pred_Rs.shape[-1] n = pred_Rs.shape[0] Q = torch.addbmm(torch.zeros(d, d, dtype=torch.double), gt_Rs, pred_Rs.transpose(1, 2)) Uq, _, Vq = torch.svd(Q) sv = torch.ones(d, dtype=torch.double) sv[-1] = torch.det(Uq @ Vq.transpose(0, 1)) R_opt = Uq @ torch.diag(sv) @ Vq.transpose(0, 1) R_fixed = torch.bmm(R_opt.repeat(n, 1, 1), pred_Rs) # find translation pred_ts = pred_ts @ R_opt.transpose(0, 1) c_opt = cp.Variable() t_opt = cp.Variable((1, d)) constraints = [] obj = cp.Minimize( cp.sum( cp.norm(gt_ts.numpy() - (c_opt * pred_ts.numpy() + np.ones( (n, 1), dtype=np.double) @ t_opt), axis=1))) prob = cp.Problem(obj, constraints) prob.solve() t_fixed = c_opt.value * pred_ts.numpy() + np.ones( (n, 1), dtype=np.double) * t_opt.value # Calculate transaltion error t_error = np.linalg.norm(t_fixed - gt_ts.numpy(), axis=-1) t_error = t_error t_error_mean = np.mean(t_error) t_error_medi = np.median(t_error) # Calculate rotation error R_error = compare_rotations(R_fixed, gt_Rs) R_error = R_error.numpy() R_error_mean = np.mean(R_error) R_error_medi = np.median(R_error) print( 'CAMERAS EVALUATION: R error mean = {0} ; t error mean = {1} ; R error median = {2} ; t error median = {3}' .format("%.2f" % R_error_mean, "%.2f" % t_error_mean, "%.2f" % R_error_medi, "%.2f" % t_error_medi)) # return alignment and aligned pose return R_opt.numpy(), t_opt.value, c_opt.value, R_fixed.numpy(), t_fixed
def blas_lapack_ops(self): m = torch.randn(3, 3) a = torch.randn(10, 3, 4) b = torch.randn(10, 4, 3) v = torch.randn(3) return ( torch.addbmm(m, a, b), torch.addmm(torch.randn(2, 3), torch.randn(2, 3), torch.randn(3, 3)), torch.addmv(torch.randn(2), torch.randn(2, 3), torch.randn(3)), torch.addr(torch.zeros(3, 3), v, v), torch.baddbmm(m, a, b), torch.bmm(a, b), torch.chain_matmul(torch.randn(3, 3), torch.randn(3, 3), torch.randn(3, 3)), # torch.cholesky(a), # deprecated torch.cholesky_inverse(torch.randn(3, 3)), torch.cholesky_solve(torch.randn(3, 3), torch.randn(3, 3)), torch.dot(v, v), torch.eig(m), torch.geqrf(a), torch.ger(v, v), torch.inner(m, m), torch.inverse(m), torch.det(m), torch.logdet(m), torch.slogdet(m), torch.lstsq(m, m), torch.lu(m), torch.lu_solve(m, *torch.lu(m)), torch.lu_unpack(*torch.lu(m)), torch.matmul(m, m), torch.matrix_power(m, 2), # torch.matrix_rank(m), torch.matrix_exp(m), torch.mm(m, m), torch.mv(m, v), # torch.orgqr(a, m), # torch.ormqr(a, m, v), torch.outer(v, v), torch.pinverse(m), # torch.qr(a), torch.solve(m, m), torch.svd(a), # torch.svd_lowrank(a), # torch.pca_lowrank(a), # torch.symeig(a), # deprecated # torch.lobpcg(a, b), # not supported torch.trapz(m, m), torch.trapezoid(m, m), torch.cumulative_trapezoid(m, m), # torch.triangular_solve(m, m), torch.vdot(v, v), )
def nn_riemannic_metric(K, W, b, need_to_normalize_weights=False): """ Given Riemannian metric of the previous layer, compute that of this layer. Due to the fact that computing similarity between group action generated filters would require backtracking computation of similarity of all spatial location in the image, which is not feasible, only similarity between filters of different channels are used. Args: K: the Riemannian metric W: the weigth matrix, 4D or 2D, or None (means identity matrix) b: the bias vector """ shape = cg.get_shape(W) if len(shape) == 4: c_out, c_in, h, w = shape W = W.permute(2, 3, 0, 1).contiguous() W = W.view(h * w, c_out, c_in) W_T = th.transpose(W, 1, 2) # Compute sum_{H * W}(WKW^{T}_{i}) if K is not None: left = th.matmul(W, K) else: left = W if b is not None: mat = th.ger(b, b) # Outer product K_out = th.addbmm(mat, left, W_T) else: K_out = th.sum(th.bmm(left, W_T), dim=0) elif len(shape) == 2: if K is not None: right = th.matmul(K, W) else: right = W if b is not None: mat = th.ger(b, b) # Outer product K_out = th.addmm(mat, W.t(), right) else: K_out = th.mm(W.t(), right) else: raise ValueError("Shape {} is not supported".format(shape)) K_out = K_out * (K_out > 0).float() return K_out
def distdot_inner(X_mus, X_sigs, X_alphas, Y_mus, Y_sigs, Y_alphas): bs, K, D = X_mus.size() # calcuate (X_mus - Y_mus)^T (X_mus - Y_mus) X_minus_Y = X_mus - Y_mus XY_dot = torch.bmm(X_minus_Y, X_minus_Y.permute(0, 2, 1)) # calculate X_sigs + Y_sigs zero_mat = Variable(torch.zeros(K, K).cuda()) XY_sig = torch.addbmm(zero_mat, X_sigs, Y_sigs.permute(0, 2, 1)) # calculate xi: bs x K x K partial_energies = -0.5 * D * torch.log(XY_sig) - 0.5 * D * np.log( np.pi) - 0.5 * (1. / XY_sig) * XY_dot partial_energies = torch.exp(partial_energies) # calculate energy XY_alphs = torch.bmm(X_alphas.unsqueeze(2), Y_alphas.unsqueeze(1)) energy = XY_alphs * partial_energies energy = torch.sum(energy.view(bs, K * K), 1) return energy
def test_addbmm(self): ipex.enable_auto_dnnl() rand_seed = int(get_rand_seed()) print("{} rand sed: {}".format(sys._getframe().f_code.co_name, rand_seed)) torch.manual_seed(rand_seed) for i in range(8, 12, 2): for j in range(8, 12, 2): alpha = i / 10 beta = j / 10 num_batches = 10 M, N, O = 23, 8, 12 b1_cpu = torch.randn(num_batches, M, N, dtype=torch.float32) b2_cpu = torch.randn(num_batches, N, O, dtype=torch.float32) res_cpu = torch.randn(M, O, dtype=torch.float32) b1_dpcpp = b1_cpu.to(device=device) b2_dpcpp = b2_cpu.to(device=device) res_dpcpp = res_cpu.to(device=device) addbmm_cpu = torch.addbmm(res_cpu, b1_cpu, b2_cpu, beta=beta, alpha=alpha) addbmm_dpcpp = torch.addbmm(res_dpcpp, b1_dpcpp, b2_dpcpp, beta=beta, alpha=alpha) self.assertEqual(addbmm_cpu, addbmm_dpcpp) y_cpu = torch.randn(M, O, dtype=torch.float32) y_dpcpp = y_cpu.to(device=device) torch.addbmm(res_cpu, b1_cpu, b2_cpu, beta=beta, alpha=alpha, out=y_cpu) torch.addbmm(res_dpcpp, b1_dpcpp, b2_dpcpp, beta=beta, alpha=alpha, out=y_dpcpp) self.assertEqual(y_cpu, y_dpcpp)
def forward(self, input): if self.bias is None: return torch.addbmm(0, self.zero_bias, 1, input, self.weight) else: return torch.addbmm(self.bias, input, self.weight)
import torch import numpy as np if __name__ == '__main__': M = torch.randn(3, 5) batch1 = torch.randn(10, 3, 4) batch2 = torch.randn(10, 4, 5) print(torch.addbmm(M, batch1, batch2)) # beta * M + alpha * batch1 * batch2 M_batch = torch.randn(10, 3, 5) print(torch.baddbmm(M_batch, batch1, batch2).shape) # torch.Size([10, 3, 5]) print(torch.bmm(batch1, batch2).shape) # batch1 * batch2
def forward(self): return torch.addbmm(self.input_one, self.batch1, self.batch2)
r = torch.ger(v1, v2) # Add M with outer product of 2 vectors # Size 3x2 vec1 = torch.arange(1, 4) # Size 3 vec2 = torch.arange(1, 3) # Size 2 M = torch.zeros(3, 2) r = torch.addr(M, vec1, vec2) # Batch Matrix x Matrix # Size 10x3x5 batch1 = torch.randn(10, 3, 4) batch2 = torch.randn(10, 4, 5) r = torch.bmm(batch1, batch2) # Batch Matrix + Matrix x Matrix # Performs a batch matrix-matrix product # 3x4 + (5x3x4 X 5x4x2 ) -> 5x3x2 M = torch.randn(3, 2) batch1 = torch.randn(5, 3, 4) batch2 = torch.randn(5, 4, 2) r = torch.addbmm(M, batch1, batch2) # Move Tensors to GPU if torch.cuda.is_available(): x = x.cuda() y = y.cuda() x + y print(r)
def _update(self, x, y): x = x.unsqueeze(1) y = y.unsqueeze(2) update = torch.addbmm(torch.autograd.Variable(torch.Tensor([0]).cuda()), y, x) self.permanence.data = (self.permanence.data + self.lr * update.data * self.module.weight_raw.data).clamp_(-1, 1)