Ejemplo n.º 1
0
    def test_cublasZgemmBatched(self):
        l, m, k, n = 11, 7, 5, 3
        A = (np.random.rand(l, m, k)+1j*np.random.rand(l, m, k)).astype(np.complex128)
        B = (np.random.rand(l, k, n)+1j*np.random.rand(l, k, n)).astype(np.complex128)

        C_res = np.einsum('nij,njk->nik', A, B)

        a_gpu = gpuarray.to_gpu(A)
        b_gpu = gpuarray.to_gpu(B)
        c_gpu = gpuarray.empty((l, m, n), np.complex128)

        alpha = np.complex128(1.0)
        beta = np.complex128(0.0)

        a_arr = bptrs(a_gpu)
        b_arr = bptrs(b_gpu)
        c_arr = bptrs(c_gpu)

        cublas.cublasZgemmBatched(self.cublas_handle, 'n','n',
                                  n, m, k, alpha,
                                  b_arr.gpudata, n,
                                  a_arr.gpudata, k,
                                  beta, c_arr.gpudata, n, l)

        assert np.allclose(C_res, c_gpu.get())
Ejemplo n.º 2
0
def eps_l_noop_batch(x_ptrs, A1_ptrs, A2_ptrs, out, tmp_ptrs, tmp2_ptrs, tmp2, handle):
    D = out.shape[0]
    d = len(tmp2)
    
    cb.cublasZgemmBatched(handle, 'N', 'C', D, D, D, 1., x_ptrs.gpudata, D, 
                          A1_ptrs.gpudata, D, 0., tmp_ptrs.gpudata, D, d)
    cb.cublasZgemmBatched(handle, 'N', 'N', D, D, D, 1., A2_ptrs.gpudata, D, 
                          tmp_ptrs.gpudata, D, 0., tmp2_ptrs.gpudata, D, d)
                          
    out.fill(0)
    for s in range(d):
        cb.cublasZaxpy(handle, D * D, 1., tmp2[s].gpudata, 1, out.gpudata, 1)
    	
    return out
Ejemplo n.º 3
0
def eps_l_noop_batch(x_ptrs, A1_ptrs, A2_ptrs, out, tmp_ptrs, tmp2_ptrs, tmp2,
                     handle):
    D = out.shape[0]
    d = len(tmp2)

    cb.cublasZgemmBatched(handle, 'N', 'C', D, D, D, 1., x_ptrs.gpudata, D,
                          A1_ptrs.gpudata, D, 0., tmp_ptrs.gpudata, D, d)
    cb.cublasZgemmBatched(handle, 'N', 'N', D, D, D, 1., A2_ptrs.gpudata, D,
                          tmp_ptrs.gpudata, D, 0., tmp2_ptrs.gpudata, D, d)

    out.fill(0)
    for s in range(d):
        cb.cublasZaxpy(handle, D * D, 1., tmp2[s].gpudata, 1, out.gpudata, 1)

    return out
Ejemplo n.º 4
0
def eps_r_noop_batch(x_ptrs, A1_ptrs, A2_ptrs, out, tmp_ptrs, tmp2_ptrs, tmp2, handle):
    D = out.shape[0]
    Dm1 = D
    d = len(tmp2)

    cb.cublasZgemmBatched(
        handle, "N", "N", D, Dm1, D, 1.0, x_ptrs.gpudata, D, A1_ptrs.gpudata, D, 0.0, tmp_ptrs.gpudata, D, d
    )
    cb.cublasZgemmBatched(
        handle, "C", "N", Dm1, Dm1, D, 1.0, A2_ptrs.gpudata, D, tmp_ptrs.gpudata, D, 0.0, tmp2_ptrs.gpudata, Dm1, d
    )

    out.fill(0)
    for s in xrange(d):
        cb.cublasZaxpy(handle, Dm1 * Dm1, 1.0, tmp2[s].gpudata, 1, out.gpudata, 1)

    return out