Example #1
0
 def test_cublasSdot(self):
     x = np.random.rand(5).astype(np.float32)
     x_gpu = gpuarray.to_gpu(x)
     y = np.random.rand(5).astype(np.float32)
     y_gpu = gpuarray.to_gpu(y)
     result = cublas.cublasSdot(self.cublas_handle, x_gpu.size, x_gpu.gpudata, 1,
                                y_gpu.gpudata, 1)
     assert np.allclose(result, np.dot(x, y))
Example #2
0
 def forward(self, vector1, vector2):
     with torch.cuda.device_of(vector1):
         output = vector1.new(1)
         handle = torch.cuda.current_blas_handle()
         stream = torch.cuda.current_stream()
         cublas.cublasSetStream(handle, stream)
         if isinstance(vector1, torch.cuda.FloatTensor):
             result = cublas.cublasSdot(handle, vector1.numel(),
                                        vector1.data_ptr(), 1,
                                        vector2.data_ptr(), 1)
         elif isinstance(vector1, torch.cuda.DoubleTensor):
             result = cublas.cublasDdot(handle, vector1.numel(),
                                        vector1.data_ptr(), 1,
                                        vector2.data_ptr(), 1)
         output = output.fill_(float(result))
     self.save_for_backward(vector1, vector2)
     return output
Example #3
0
# (In contrast, if you were using a vector from a column in a row-wise matrix, you would set the stride to the width of the matrix.)
# We then put in the pointer to the y_gpu array, and set its stride to 1 as well

#We can now use the cublasSaxpy function. The S stands for single precision, which is what we will need since we are working with 32-bit floating point arrays:
cublas.cublasSaxpy(cublas_context_h, x_gpu.size, a, x_gpu.gpudata, 1,
                   y_gpu.gpudata, 1)

print(y_gpu.get())
print 'This is close to the NumPy approximation: %s' % np.allclose(
    a * x + y, y_gpu.get())

w_gpu = gpuarray.to_gpu(x)
v_gpu = gpuarray.to_gpu(y)

#perform a dot product
dot_output = cublas.cublasSdot(cublas_context_h, v_gpu.size, v_gpu.gpudata, 1,
                               w_gpu.gpudata, 1)

print(dot_output)

l2_output = cublas.cublasSnrm2(cublas_context_h, v_gpu.size, v_gpu.gpudata, 1)

print(l2_output)

cublas.cublasDestroy(cublas_context_h)

#(f we want to operate on arrays of 64-bit real floating point values, (float64 in NumPy and PyCUDA), then we would use the cublasDaxpy)
"""Level-2 GEMV (general matrix-vector)"""

# m and n are the number of rows and columns
m = 10
n = 100