Esempio n. 1
0
import cudapy as cp

from time import time

# rangeId : void (float* A)
def rangeId(A):
  if idx < len(A):
    A[idx] = float(idx)

__rangeIdCall = cp.compile(rangeId)

# matrixMultiply : void (float* A, float* B, float* C, int m, int n, int p)
# A is m x n
# B is n x p
# C is m x p
def matrixMultiply(A, B, C, m, n, p):
  row = idy
  col = idx
  result = 0.0

  if row >= m or col >= p:
    return

  for i in xrange(n):
    result += A[row * n + i] * B[i * p + col]

  C[row * p + col] = result

__matrixMultiplyCall = cp.compile(matrixMultiply)

m = 1200
Esempio n. 2
0

# mandelbrot : void (float, float, float, float, int, int, int, int*)
def mandelbrot(x0, y0, x1, y1, width, height, maxIter, output):
    if idx >= width * height:
        return

    dx = (x1 - x0) / float(width)
    dy = (y1 - y0) / float(height)

    x = x0 + float(idx % width) * dx
    y = y0 + float(idx / width) * dy
    output[idx] = mandel(x, y, maxIter)


__mandelbrotCall = cp.compile([mandelbrot, mandel])


def mandelbrotCall(x0, y0, x1, y1, width, height, maxIter):
    cudaResult = cp.CudaArray.allocate(width * height, cp.Int)
    __mandelbrotCall(cp.dim3(width * height))(x0, y0, x1, y1, width, height, maxIter, cudaResult)
    return cudaResult.toHost()


def scaleAndShift(x0, y0, x1, y1, scale, shiftX, shiftY):
    x0 *= scale
    x1 *= scale
    y0 *= scale
    y1 *= scale
    x0 += shiftX
    x1 += shiftX
Esempio n. 3
0
import cudapy as cp

# saxpy : void (float alpha, float* X, float* Y)
def saxpy(alpha, X, Y):
  if idx < len(X):
    Y[idx] = alpha * X[idx] + Y[idx]

# Compile the kernel function
saxpyCall = cp.compile(saxpy)

X = map(float, range(100))
Y = map(float, range(100))

# Transfer Y to device memory
Y = cp.CudaArray(Y)
# Make the SAXPY call
saxpyCall(len(X))(5.0, X, Y)
# Convert the result back to Python list
result = Y.toList()

print result