def test_cublasSgemv(self):
     a = np.random.rand(2, 3).astype(np.float32)
     x = np.random.rand(3, 1).astype(np.float32)
     a_gpu = gpuarray.to_gpu(a.T.copy())
     x_gpu = gpuarray.to_gpu(x)
     y_gpu = gpuarray.empty((2, 1), np.float32)
     alpha = np.float32(1.0)
     beta = np.float32(0.0)
     cublas.cublasSgemv('n', 2, 3, alpha, a_gpu.gpudata, 2, x_gpu.gpudata,
                        1, beta, y_gpu.gpudata, 1)
     assert np.allclose(y_gpu.get(), np.dot(a, x))
 def test_cublasSgemv(self):
     a = np.random.rand(2, 3).astype(np.float32)
     x = np.random.rand(3, 1).astype(np.float32)
     a_gpu = gpuarray.to_gpu(a.T.copy())
     x_gpu = gpuarray.to_gpu(x)
     y_gpu = gpuarray.empty((2, 1), np.float32)
     alpha = np.float32(1.0)
     beta = np.float32(0.0)
     cublas.cublasSgemv(self.cublas_handle, 'n', 2, 3, alpha, a_gpu.gpudata,
                        2, x_gpu.gpudata, 1, beta, y_gpu.gpudata, 1)
     assert np.allclose(y_gpu.get(), np.dot(a, x))
def to_unit_variance(H):
    ''' Scales H so that column has a variance of 1. '''
    from scikits.cuda.misc import _global_cublas_handle as cublas_handle
    ones = gpu.empty((H.shape[0], 1), np.float32, allocator=_mempool.allocate)
    ones.fill(1.0)
    Hsq = gpu.empty(H.shape, np.float32, allocator=_mempool.allocate)
    mean = gpu.empty((1, H.shape[1]), np.float32, allocator=_mempool.allocate)
    cublasSgemv(cublas_handle, "n", H.shape[1], H.shape[0], 1.0 / H.shape[0],
                H.gpudata, H.shape[1], ones.gpudata, 1, 0.0, mean.gpudata, 1)
    _unitvariance_step1_kernel(H, mean, Hsq, H.shape[1])
    cublasSgemv(cublas_handle, "n", Hsq.shape[1], H.shape[0], 1.0 / H.shape[0],
                Hsq.gpudata, H.shape[1], ones.gpudata, 1, 0.0, mean.gpudata, 1)
    _unitvariance_step2_kernel(mean, H.shape[1])
    _unitvariance_step3_kernel(H, mean, H.shape[1])
    return H
def to_unit_variance(H):
    ''' Scales H so that column has a variance of 1. '''
    from scikits.cuda.misc import _global_cublas_handle as cublas_handle
    ones = gpu.empty((H.shape[0], 1), np.float32, allocator=_mempool.allocate)
    ones.fill(1.0)
    Hsq = gpu.empty(H.shape, np.float32, allocator=_mempool.allocate)
    mean = gpu.empty((1, H.shape[1]), np.float32, allocator=_mempool.allocate)
    cublasSgemv(cublas_handle, "n", H.shape[1], H.shape[0],
                1.0/H.shape[0], H.gpudata,  H.shape[1], ones.gpudata,
                1, 0.0, mean.gpudata, 1)
    _unitvariance_step1_kernel(H, mean, Hsq, H.shape[1])
    cublasSgemv(cublas_handle, "n", Hsq.shape[1], H.shape[0],
                1.0/H.shape[0], Hsq.gpudata, H.shape[1], ones.gpudata,
                1, 0.0, mean.gpudata, 1)
    _unitvariance_step2_kernel(mean, H.shape[1])
    _unitvariance_step3_kernel(H, mean, H.shape[1])
    return H