Python cublasSgemv Examples

Programming Language: Python

Namespace/Package Name: skcuda.cublas

Method/Function: cublasSgemv

Examples at hotexamples.com: 6

Python cublasSgemv - 6 examples found. These are the top rated real world Python examples of skcuda.cublas.cublasSgemv extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def _perform_sgemv(self, mat, v, vec_out, nvecs, dim):
        '''
        NOTES: cuBLAS uses Fortran layout
        cublas_sgemv is used to multiply matrix and vector (LEVEl 2 BLAS)

        cublas_handle   -> handle to the cuBLAS library context
        t               -> transpose
        dim             -> number of columns of matrix
        nvecs           -> number of rows of matrix
        alpha           -> scalar used for multiplication of mat
        mat.gpudata     -> matrix mat
        dim             -> columns of matrix
        v.gpudata       -> vector v
        incX            -> Stride within X. For example, if incX is 7, every 7th element is used.
        beta            -> scalar used for multiplication of v
        v_out.gpudata   -> result
        incY            -> Stride within Y. For example, if incx is 7, every 7th element is used

        Readmore        -> http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemv
        '''
        alpha = np.float32(1.0)
        beta = np.float32(0.0)

        incx = 1
        incy = 1

        cublas_handle = cublas.cublasCreate()

        cublas.cublasSgemv(cublas_handle, 't', dim, nvecs, alpha, mat.gpudata,
                           dim, v.gpudata, incx, beta, vec_out.gpudata, incy)

        cublas.cublasDestroy(cublas_handle)

        return vec_out

Example #2

Show file

 def test_cublasSgemv(self):
     a = np.random.rand(2, 3).astype(np.float32)
     x = np.random.rand(3, 1).astype(np.float32)
     a_gpu = gpuarray.to_gpu(a.T.copy())
     x_gpu = gpuarray.to_gpu(x)
     y_gpu = gpuarray.empty((2, 1), np.float32)
     alpha = np.float32(1.0)
     beta = np.float32(0.0)
     cublas.cublasSgemv(self.cublas_handle, 'n', 2, 3, alpha, a_gpu.gpudata,
                        2, x_gpu.gpudata, 1, beta, y_gpu.gpudata, 1)
     assert np.allclose(y_gpu.get(), np.dot(a, x))

Example #3

Show file

File: test_cublas.py Project: Brainiarc7/scikit-cuda

 def test_cublasSgemv(self):
     a = np.random.rand(2, 3).astype(np.float32)
     x = np.random.rand(3, 1).astype(np.float32)
     a_gpu = gpuarray.to_gpu(a.T.copy())
     x_gpu = gpuarray.to_gpu(x)
     y_gpu = gpuarray.empty((2, 1), np.float32)
     alpha = np.float32(1.0)
     beta = np.float32(0.0)
     cublas.cublasSgemv(self.cublas_handle, 'n', 2, 3, alpha,
                        a_gpu.gpudata, 2, x_gpu.gpudata,
                        1, beta, y_gpu.gpudata, 1)
     assert np.allclose(y_gpu.get(), np.dot(a, x))

Example #4

Show file

File: pyCUDA_GMRES.py Project: YohanPoirier/WS

def product_Ax(A_d, X, N, handler, precision):

    if precision == 2:
        RP = np.float64
    else:
        RP = np.float32

    X_d = ga.to_gpu(X.reshape((N, 1)).astype(RP))
    res_d = ga.to_gpu(np.zeros((N, 1)).astype(RP))

    alpha = 1
    if precision == 1:
        cublas.cublasSgemv(handler, "t", N, N, alpha, A_d.gpudata, N,
                           X_d.gpudata, 1, 0, res_d.gpudata, 1)
    else:
        cublas.cublasDgemv(handler, "t", N, N, alpha, A_d.gpudata, N,
                           X_d.gpudata, 1, 0, res_d.gpudata, 1)

    result = res_d.get()

    return result[:, 0]

Example #5

Show file

# refers to the structure of the matrix,we can specify whether we want to use the original matrix, a direct transpose, or a conjugate transpose (for complex matrices)
# Since we now have the column-wise matrix stored properly on the GPU, we can set the trans variable to not take the transpose by using the _CUBLAS_OP dictionary
trans = cublas._CUBLAS_OP['N']

# indicates the leading dimension of the matrix, where the total size of the matrix is actually lda x n (if lda > m -> Problems)
lda = m

# x and its stride, incx; x is the underlying C pointer of the vector being multiplied by A. Remember, x will have to be of size n;
incx = 1
# y and its stride incy as the last parameters. We should remember that y should be of size m, or the number of rows
incy = 1
handle = cublas.cublasCreate()  # refers to the cuBLAS context.

#

cublas.cublasSgemv(handle, trans, m, n, alpha, A_gpu.gpudata, lda,
                   x_gpu.gpudata, incx, beta, y_gpu.gpudata, incy)

cublas.cublasDestroy(handle)
print 'cuBLAS returned the correct value: %s' % np.allclose(
    np.dot(A, x), y_gpu.get())
"""Level-3 GEMM (general matrix-matrix)"""

# performance metric for our GPU to determine the number of Floating Point Operations Per Second (FLOPS) it can perform,
# which will be two separate values: the case of single precision, and that of double precision

# m, n, and k variables for our matrix sizes
m = 5000
n = 10000
k = 10000

Example #6

Show file

File: array.py Project: akassab/gpu_project

print("\nComputing matrix vector mutliplication in CPU:\n")
print("a = \n", a_cpu, "\n")
print("b = \n", b_cpu, "\n")
print("Product =\n", c_cpu, "\n")

# allocating and converting (a) to a gpuarray:
a_gpu = gpuarray.to_gpu(a_cpu)
# allocating and converting (b) to a gpuarray:
b_gpu = gpuarray.to_gpu(b_cpu)
# allocating c as a 2x1 matrix filled with zeros:
c_gpu = gpuarray.zeros((2,1), dtype = np.float32)
# Computing matrix product of gpu (a) and (b) and storing it in gpu (c):
cublas.cublasSgemv(handle = cublas.cublasCreate(),
				trans = 'n',
				m = 2, n = 3,
				alpha = 1.0,
				A = a_gpu.gpudata, lda = 2,
				x = b_gpu.gpudata, incx = 1, 
				beta = 1.0, y = c_gpu.gpudata, incy = 1)

# printing the process(GPU):
print("\nComputing matrix vector mutliplication in GPU:\n")
print("a = \n", a_gpu.get(), "\n")
print("b = \n", b_gpu.get(), "\n")
print("Product =\n", c_gpu.get(), "\n")



#print(a_gpu, "\n", b_gpu)

#print(c_gpu)