def test_addbmm(self):
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     for i in range(8, 12, 2):
         for j in range(8, 12, 2):
             alpha = i / 10
             beta = j / 10
             num_batches = 10
             x_auto_mix_a, x_auto_mix_b, add_auto_mix, x_man_bf16_a, x_man_bf16_b, add_man_bf16 = self._gen_mm_tensor(
                 rand_seed, num_batches)
             with AutoDNNL(True), AutoMixPrecision(False):
                 res_man_bf16 = torch.addbmm(add_man_bf16,
                                             x_man_bf16_a,
                                             x_man_bf16_b,
                                             beta=beta,
                                             alpha=alpha)
                 self.assertEqual(res_man_bf16.dtype, torch.bfloat16)
                 with AutoMixPrecision(True):
                     res_auto_mix = torch.addbmm(add_auto_mix,
                                                 x_auto_mix_a,
                                                 x_auto_mix_b,
                                                 beta=beta,
                                                 alpha=alpha)
                     self.assertEqual(res_auto_mix.dtype, torch.float)
                     self.assertTrue(
                         ipex.core.is_bf16_dil_tensor(res_auto_mix))
                     self.assertEqual(res_auto_mix, res_man_bf16.float())
 def _worker_gradient_descent(self, gidx):
     gpuid = self._gpu_ids[gidx]
     chunk_begin, chunk_end = (0, self.chunk_size)
     local_idx_2sites_batch = self.idx_2sites[gidx][
         self.current_bond, :, :,
         self.batch_head:self.batch_head + self.batch_size_gpu]
     while chunk_begin < self.batch_size_gpu:
         cumu1 = self.cumulants[self.current_bond][
             gidx, chunk_begin:chunk_end, :, :].cuda(gpuid)
         cumu2 = self.cumulants[self.current_bond +
                                1][gidx,
                                   chunk_begin:chunk_end, :, :].cuda(gpuid)
         converter = torch.arange(chunk_end - chunk_begin).type(
             torch.LongTensor).cuda(gpuid)
         for i, j in [(i, j) for i in range(2) for j in range(2)]:
             idx = converter[local_idx_2sites_batch[i, j,
                                                    chunk_begin:chunk_end]]
             if (idx.numel() == 0):
                 continue
             lvecs = cumu1[idx]
             rvecs = cumu2[idx]
             psis = lvecs @ self.merged_tensor_BC[gidx][:, i, j, :].repeat(
                 idx.numel(), 1, 1) @ rvecs
             psis += self.epsi  ### this is to resolve the zero psis issue, for the float-storage
             lvecs /= psis
             torch.addbmm(self.gradients[gidx][:, i, j, :],
                          lvecs.permute(0, 2, 1),
                          rvecs.permute(0, 2, 1),
                          out=self.gradients[gidx][:, i, j, :])
         chunk_begin = chunk_end
         chunk_end = min(self.batch_size_gpu, chunk_end + self.chunk_size)
     self._job_queue.get()
Beispiel #3
0
 def _compute_posterior_means_and_covariances(self, n_all, f_all,
                                              batch_size,
                                              component_batches):
     covariances = torch.zeros(self.ivec_dim,
                               batch_size,
                               self.ivec_dim,
                               device=self.t_matrix.device)
     means = torch.zeros(self.ivec_dim,
                         batch_size,
                         device=self.t_matrix.device)
     for bstart, bend in component_batches:
         n = n_all[:, bstart:bend]
         f = f_all[bstart:bend, :, :]
         sub_t = self.t_matrix[bstart:bend, :, :]
         sub_inv_covars = self.inv_covariances[bstart:bend, :, :]
         sub_tc = torch.bmm(sub_t, sub_inv_covars)
         tt = torch.bmm(sub_tc, torch.transpose(sub_t, 1, 2))
         tt.transpose_(0, 1)
         covariances += torch.matmul(n, tt)
         means = torch.addbmm(means, sub_tc, f)
     covariances.transpose_(0, 1)
     covariances += self.identity
     covariances = torch.inverse(covariances)
     means.t_()
     means[:, 0] += self.prior_offset
     means.unsqueeze_(2)
     means = torch.bmm(covariances, means)
     means = means.view((means.size()[:2]))
     return means, covariances
Beispiel #4
0
 def forward(ctx, add_matrix, batch1, batch2, alpha=1, beta=1, inplace=False):
     ctx.alpha = alpha
     ctx.beta = beta
     ctx.add_matrix_size = add_matrix.size()
     ctx.save_for_backward(batch1, batch2)
     output = _get_output(ctx, add_matrix, inplace=inplace)
     return torch.addbmm(alpha, add_matrix, beta,
                         batch1, batch2, out=output)
Beispiel #5
0
 def forward(ctx, add_matrix, batch1, batch2, alpha=1, beta=1, inplace=False):
     ctx.alpha = alpha
     ctx.beta = beta
     ctx.add_matrix_size = add_matrix.size()
     ctx.save_for_backward(batch1, batch2)
     output = _get_output(ctx, add_matrix, inplace=inplace)
     return torch.addbmm(alpha, add_matrix, beta,
                         batch1, batch2, out=output)
Beispiel #6
0
 def forward(self, x, y, z, m):
     out1 = x.addbmm(y, z, beta=2, alpha=3)
     out2 = torch.addbmm(x, y, z, beta=2, alpha=3)
     #out3 = x.addbmm_(y, z, beta=2, alpha=3)
     out3 = m.baddbmm(y, z, beta=2, alpha=3)
     out4 = torch.baddbmm(m, y, z, beta=2, alpha=3)
     out5 = torch.bmm(y, z) # deterministic is not supported by jit
     return out1, out2, out3, out4, out5
Beispiel #7
0
 def forward(self, add_matrix, batch1, batch2):
     self.save_for_backward(batch1, batch2)
     output = self._get_output(add_matrix)
     return torch.addbmm(self.alpha,
                         add_matrix,
                         self.beta,
                         batch1,
                         batch2,
                         out=output)
Beispiel #8
0
def get_cameras_accuracy(
    pred_Rs,
    gt_Rs,
    pred_ts,
    gt_ts,
):
    ''' Align predicted pose to gt pose and print cameras accuracy'''

    # find rotation
    d = pred_Rs.shape[-1]
    n = pred_Rs.shape[0]

    Q = torch.addbmm(torch.zeros(d, d, dtype=torch.double), gt_Rs,
                     pred_Rs.transpose(1, 2))
    Uq, _, Vq = torch.svd(Q)
    sv = torch.ones(d, dtype=torch.double)
    sv[-1] = torch.det(Uq @ Vq.transpose(0, 1))
    R_opt = Uq @ torch.diag(sv) @ Vq.transpose(0, 1)
    R_fixed = torch.bmm(R_opt.repeat(n, 1, 1), pred_Rs)

    # find translation
    pred_ts = pred_ts @ R_opt.transpose(0, 1)
    c_opt = cp.Variable()
    t_opt = cp.Variable((1, d))

    constraints = []
    obj = cp.Minimize(
        cp.sum(
            cp.norm(gt_ts.numpy() - (c_opt * pred_ts.numpy() + np.ones(
                (n, 1), dtype=np.double) @ t_opt),
                    axis=1)))
    prob = cp.Problem(obj, constraints)
    prob.solve()
    t_fixed = c_opt.value * pred_ts.numpy() + np.ones(
        (n, 1), dtype=np.double) * t_opt.value

    # Calculate transaltion error
    t_error = np.linalg.norm(t_fixed - gt_ts.numpy(), axis=-1)
    t_error = t_error
    t_error_mean = np.mean(t_error)
    t_error_medi = np.median(t_error)

    # Calculate rotation error
    R_error = compare_rotations(R_fixed, gt_Rs)

    R_error = R_error.numpy()
    R_error_mean = np.mean(R_error)
    R_error_medi = np.median(R_error)

    print(
        'CAMERAS EVALUATION: R error mean = {0} ; t error mean = {1} ; R error median = {2} ; t error median = {3}'
        .format("%.2f" % R_error_mean, "%.2f" % t_error_mean,
                "%.2f" % R_error_medi, "%.2f" % t_error_medi))

    # return alignment and aligned pose
    return R_opt.numpy(), t_opt.value, c_opt.value, R_fixed.numpy(), t_fixed
Beispiel #9
0
 def blas_lapack_ops(self):
     m = torch.randn(3, 3)
     a = torch.randn(10, 3, 4)
     b = torch.randn(10, 4, 3)
     v = torch.randn(3)
     return (
         torch.addbmm(m, a, b),
         torch.addmm(torch.randn(2, 3), torch.randn(2, 3),
                     torch.randn(3, 3)),
         torch.addmv(torch.randn(2), torch.randn(2, 3), torch.randn(3)),
         torch.addr(torch.zeros(3, 3), v, v),
         torch.baddbmm(m, a, b),
         torch.bmm(a, b),
         torch.chain_matmul(torch.randn(3, 3), torch.randn(3, 3),
                            torch.randn(3, 3)),
         # torch.cholesky(a), # deprecated
         torch.cholesky_inverse(torch.randn(3, 3)),
         torch.cholesky_solve(torch.randn(3, 3), torch.randn(3, 3)),
         torch.dot(v, v),
         torch.eig(m),
         torch.geqrf(a),
         torch.ger(v, v),
         torch.inner(m, m),
         torch.inverse(m),
         torch.det(m),
         torch.logdet(m),
         torch.slogdet(m),
         torch.lstsq(m, m),
         torch.lu(m),
         torch.lu_solve(m, *torch.lu(m)),
         torch.lu_unpack(*torch.lu(m)),
         torch.matmul(m, m),
         torch.matrix_power(m, 2),
         # torch.matrix_rank(m),
         torch.matrix_exp(m),
         torch.mm(m, m),
         torch.mv(m, v),
         # torch.orgqr(a, m),
         # torch.ormqr(a, m, v),
         torch.outer(v, v),
         torch.pinverse(m),
         # torch.qr(a),
         torch.solve(m, m),
         torch.svd(a),
         # torch.svd_lowrank(a),
         # torch.pca_lowrank(a),
         # torch.symeig(a), # deprecated
         # torch.lobpcg(a, b), # not supported
         torch.trapz(m, m),
         torch.trapezoid(m, m),
         torch.cumulative_trapezoid(m, m),
         # torch.triangular_solve(m, m),
         torch.vdot(v, v),
     )
Beispiel #10
0
def nn_riemannic_metric(K, W, b, need_to_normalize_weights=False):
    """
    Given Riemannian metric of the previous layer, compute that of this layer.

    Due to the fact that computing similarity between group action generated
    filters would require backtracking computation of similarity of all spatial
    location in the image, which is not feasible, only similarity between
    filters of different channels are used.

    Args:
        K: the Riemannian metric
        W: the weigth matrix, 4D or 2D, or None (means identity matrix)
        b: the bias vector
    """
    shape = cg.get_shape(W)

    if len(shape) == 4:
        c_out, c_in, h, w = shape
        W = W.permute(2, 3, 0, 1).contiguous()
        W = W.view(h * w, c_out, c_in)
        W_T = th.transpose(W, 1, 2)
        # Compute sum_{H * W}(WKW^{T}_{i})
        if K is not None:
            left = th.matmul(W, K)
        else:
            left = W
        if b is not None:
            mat = th.ger(b, b)  # Outer product
            K_out = th.addbmm(mat, left, W_T)
        else:
            K_out = th.sum(th.bmm(left, W_T), dim=0)

    elif len(shape) == 2:
        if K is not None:
            right = th.matmul(K, W)
        else:
            right = W

        if b is not None:
            mat = th.ger(b, b)  # Outer product
            K_out = th.addmm(mat, W.t(), right)
        else:
            K_out = th.mm(W.t(), right)
    else:
        raise ValueError("Shape {} is not supported".format(shape))

    K_out = K_out * (K_out > 0).float()

    return K_out
Beispiel #11
0
def distdot_inner(X_mus, X_sigs, X_alphas, Y_mus, Y_sigs, Y_alphas):
    bs, K, D = X_mus.size()
    # calcuate (X_mus - Y_mus)^T (X_mus - Y_mus)
    X_minus_Y = X_mus - Y_mus
    XY_dot = torch.bmm(X_minus_Y, X_minus_Y.permute(0, 2, 1))

    # calculate X_sigs + Y_sigs
    zero_mat = Variable(torch.zeros(K, K).cuda())
    XY_sig = torch.addbmm(zero_mat, X_sigs, Y_sigs.permute(0, 2, 1))

    # calculate xi: bs x K x K
    partial_energies = -0.5 * D * torch.log(XY_sig) - 0.5 * D * np.log(
        np.pi) - 0.5 * (1. / XY_sig) * XY_dot
    partial_energies = torch.exp(partial_energies)

    # calculate energy
    XY_alphs = torch.bmm(X_alphas.unsqueeze(2), Y_alphas.unsqueeze(1))
    energy = XY_alphs * partial_energies
    energy = torch.sum(energy.view(bs, K * K), 1)
    return energy
    def test_addbmm(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        for i in range(8, 12, 2):
            for j in range(8, 12, 2):
                alpha = i / 10
                beta = j / 10
                num_batches = 10
                M, N, O = 23, 8, 12
                b1_cpu = torch.randn(num_batches, M, N, dtype=torch.float32)
                b2_cpu = torch.randn(num_batches, N, O, dtype=torch.float32)
                res_cpu = torch.randn(M, O, dtype=torch.float32)
                b1_dpcpp = b1_cpu.to(device=device)
                b2_dpcpp = b2_cpu.to(device=device)
                res_dpcpp = res_cpu.to(device=device)

                addbmm_cpu = torch.addbmm(res_cpu,
                                          b1_cpu,
                                          b2_cpu,
                                          beta=beta,
                                          alpha=alpha)
                addbmm_dpcpp = torch.addbmm(res_dpcpp,
                                            b1_dpcpp,
                                            b2_dpcpp,
                                            beta=beta,
                                            alpha=alpha)
                self.assertEqual(addbmm_cpu, addbmm_dpcpp)
                y_cpu = torch.randn(M, O, dtype=torch.float32)
                y_dpcpp = y_cpu.to(device=device)
                torch.addbmm(res_cpu,
                             b1_cpu,
                             b2_cpu,
                             beta=beta,
                             alpha=alpha,
                             out=y_cpu)
                torch.addbmm(res_dpcpp,
                             b1_dpcpp,
                             b2_dpcpp,
                             beta=beta,
                             alpha=alpha,
                             out=y_dpcpp)
                self.assertEqual(y_cpu, y_dpcpp)
Beispiel #13
0
 def forward(self, input):
     if self.bias is None:
         return torch.addbmm(0, self.zero_bias, 1, input, self.weight)
     else:
         return torch.addbmm(self.bias, input, self.weight)
Beispiel #14
0
import torch
import numpy as np

if __name__ == '__main__':
    M = torch.randn(3, 5)
    batch1 = torch.randn(10, 3, 4)
    batch2 = torch.randn(10, 4, 5)
    print(torch.addbmm(M, batch1,
                       batch2))  # beta * M + alpha * batch1 * batch2

    M_batch = torch.randn(10, 3, 5)
    print(torch.baddbmm(M_batch, batch1,
                        batch2).shape)  # torch.Size([10, 3, 5])
    print(torch.bmm(batch1, batch2).shape)  # batch1 * batch2
Beispiel #15
0
 def forward(self):
     return torch.addbmm(self.input_one, self.batch1, self.batch2)
Beispiel #16
0
r = torch.ger(v1, v2)

# Add M with outer product of 2 vectors
# Size 3x2
vec1 = torch.arange(1, 4)  # Size 3
vec2 = torch.arange(1, 3)  # Size 2
M = torch.zeros(3, 2)
r = torch.addr(M, vec1, vec2)

# Batch Matrix x Matrix
# Size 10x3x5
batch1 = torch.randn(10, 3, 4)
batch2 = torch.randn(10, 4, 5)
r = torch.bmm(batch1, batch2)

# Batch Matrix + Matrix x Matrix
# Performs a batch matrix-matrix product
# 3x4 + (5x3x4 X 5x4x2 ) -> 5x3x2
M = torch.randn(3, 2)
batch1 = torch.randn(5, 3, 4)
batch2 = torch.randn(5, 4, 2)
r = torch.addbmm(M, batch1, batch2)

# Move Tensors to GPU
if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    x + y

print(r)
 def _update(self, x, y):
     x = x.unsqueeze(1)
     y = y.unsqueeze(2)
     update = torch.addbmm(torch.autograd.Variable(torch.Tensor([0]).cuda()), y, x)
     self.permanence.data = (self.permanence.data + self.lr * update.data * self.module.weight_raw.data).clamp_(-1, 1)