Example #1
0
    def kernel(callDim, y=1, z=1):
        if not isinstance(callDim, dim3):
            callDim = dim3(callDim, y, z)
        blockDim = allocateThreads(threadSize, callDim)
        gridDim = getGridDim(callDim, blockDim)

        def coerceArgs(*args):
            args = list(args)
            if len(args) != len(sig):
                raise CudaPyError(funName + " takes " + str(len(sig)) + " arguments.")

            temps = []  # Prevent premature garbage collection
            for i in xrange(len(sig)):
                if isinstance(sig[i], Pointer):
                    if isinstance(args[i], list):
                        temps.append(CudaArray(args[i]))
                        args[i] = temps[-1]
                    assert isinstance(args[i], BaseCudaArray), "expected CudaArray found " + type(args[i]).__name__
                    assert args[i].elemType() == sig[i].elemType(), "argument types do not match"
                    args[i] = args[i].pointer()

            args = [gridDim, blockDim] + args
            fun(*args)

        return coerceArgs
Example #2
0
def getGridDim(callDim, blockDim):
    def divideUp(n, d):
        return (n + d - 1) // d

    x = divideUp(callDim.x, blockDim.x)
    y = divideUp(callDim.y, blockDim.y)
    z = divideUp(callDim.z, blockDim.z)
    return dim3(x, y, z)
Example #3
0
def allocateThreads(threads, dim):
    def power_two(n):
        return 1 << (n.bit_length() - 1)

    tx = min(threads, power_two(dim.x))
    threads //= tx
    ty = min(threads, power_two(dim.y))
    threads //= ty
    tz = min(threads, power_two(dim.z))
    threads //= tz

    return dim3(tx, ty, tz)