Example #1
0
def malloc_gpu_arrays(nx, ny, nz, cex, cey, cez):
    print 'rank= %d, (%d, %d, %d)' % (rank, nx, ny, nz),
    total_bytes = nx * ny * nz * 4 * 9
    if total_bytes / (1024**3) == 0:
        print '%d MB' % (total_bytes / (1024**2))
    else:
        print '%1.2f GB' % (float(total_bytes) / (1024**3))

    if nz % Dx != 0:
        print "Error: nz is not multiple of %d" % (Dx)
        sys.exit()
    if (nx * ny) % Dy != 0:
        print "Error: nx*ny is not multiple of %d" % (Dy)
        sys.exit()

    f = np.zeros((nx, ny, nz), 'f')
    ex_gpu = cuda.to_device(f)
    ey_gpu = cuda.to_device(f)
    ez_gpu = cuda.to_device(f)
    hx_gpu = cuda.to_device(f)
    hy_gpu = cuda.to_device(f)
    hz_gpu = cuda.to_device(f)

    descr = cuda.ArrayDescriptor3D()
    descr.width = nz
    descr.height = ny
    descr.depth = nx
    descr.format = cuda.dtype_to_array_format(f.dtype)
    descr.num_channels = 1
    descr.flags = 0
    tcex_gpu = cuda.Array(descr)
    tcey_gpu = cuda.Array(descr)
    tcez_gpu = cuda.Array(descr)

    mcopy = cuda.Memcpy3D()
    mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
    mcopy.src_height = mcopy.height = ny
    mcopy.depth = nx

    mcopy.set_src_host(cex)
    mcopy.set_dst_array(tcex_gpu)
    mcopy()
    mcopy.set_src_host(cey)
    mcopy.set_dst_array(tcey_gpu)
    mcopy()
    mcopy.set_src_host(cez)
    mcopy.set_dst_array(tcez_gpu)
    mcopy()

    eh_fields = [ex_gpu, ey_gpu, ez_gpu, hx_gpu, hy_gpu, hz_gpu]
    tex_fields = [tcex_gpu, tcey_gpu, tcez_gpu]

    return eh_fields, tex_fields
Example #2
0
def create_3d_texture(a, module, variable, point_sampling=False):
    
    a = numpy.asfortranarray(a)
    w, h, d = a.shape
    
    descr = cuda.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = cuda.dtype_to_array_format(a.dtype)
    descr.num_channels = 1
    descr.flags = 0
    ary = cuda.Array(descr)
   
    copy = cuda.Memcpy3D()
    copy.set_src_host(a)
    copy.set_dst_array(ary)
    copy.width_in_bytes = copy.src_pitch = a.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d
    copy()

    out_texref = module.get_texref(variable)
    out_texref.set_array(ary)
    if point_sampling: 
        out_texref.set_filter_mode(cuda.filter_mode.POINT)
    else: 
        out_texref.set_filter_mode(cuda.filter_mode.LINEAR)
    return out_texref
Example #3
0
def to_tex3d(data):
    """
    Source: https://wiki.tiker.net/PyCUDA/Examples/Demo3DSurface
    """
    d, h, w = data.shape
    descr = cuda.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = cuda.dtype_to_array_format(data.dtype)
    descr.num_channels = 1
    descr.flags = 0

    if isinstance(data, gpuarray.GPUArray):
        data = data.get()

    device_array = cuda.Array(descr)
    copy = cuda.Memcpy3D()
    copy.set_src_host(data)
    copy.set_dst_array(device_array)
    copy.width_in_bytes = copy.src_pitch = data.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d
    copy()

    return device_array
Example #4
0
def gpuArray3DtocudaArray(gpuArray, allowSurfaceBind=False, precision='float'):
    #import pycuda.autoinit
    d, h, w = gpuArray.shape
    descr3D = cuda.ArrayDescriptor3D()
    descr3D.width = w
    descr3D.height = h
    descr3D.depth = d
    if precision == 'float':
        descr3D.format = cuda.dtype_to_array_format(gpuArray.dtype)
        descr3D.num_channels = 1
    elif precision == 'double':
        descr3D.format = cuda.array_format.SIGNED_INT32
        descr3D.num_channels = 2
    else:
        print("ERROR:  CUDA_ARRAY incompatible precision")
        sys.exit()
    descr3D.flags = 0
    if allowSurfaceBind:
        descr3D.flags = cuda.array3d_flags.SURFACE_LDST
    cudaArray = cuda.Array(descr3D)
    copy3D = cuda.Memcpy3D()
    copy3D.set_src_device(gpuArray.ptr)
    copy3D.set_dst_array(cudaArray)
    copy3D.width_in_bytes = copy3D.src_pitch = gpuArray.strides[1]
    copy3D.src_height = copy3D.height = h
    copy3D.depth = d
    copy3D()
    return cudaArray, copy3D
def numpy3d_to_array(np_array, allow_surface_bind=False, layered=True):
    d, h, w = np_array.shape

    descr = cuda.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = cuda.dtype_to_array_format(np_array.dtype)
    descr.num_channels = 1
    descr.flags = 0

    if allow_surface_bind:
        descr.flags = cuda.array3d_flags.SURFACE_LDST

    if layered:
        descr.flags = cuda.array3d_flags.ARRAY3D_LAYERED

    device_array = cuda.Array(descr)

    copy = cuda.Memcpy3D()
    copy.set_src_host(np_array)
    copy.set_dst_array(device_array)
    copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d

    copy()

    return device_array
Example #6
0
def numpy3d_to_array(np_array):
    '''Copy a 3D (d,h,w) numpy array into a 3D pycuda array that can be used 
    to set a texture.  (For some reason, gpuarrays can't be used to do that 
    directly).  A transpose happens implicitly; the CUDA array has dim (w,h,d).'''
    import pycuda.autoinit
    d, h, w = np_array.shape
    descr = driver.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    if np_array.dtype == np.float64:
        descr.format = driver.array_format.SIGNED_INT32
        descr.num_channels = 2
    else:
        descr.format = driver.dtype_to_array_format(np_array.dtype)
        descr.num_channels = 1
    descr.flags = 0
    device_array = driver.Array(descr)
    copy = driver.Memcpy3D()
    copy.set_src_host(np_array)
    copy.set_dst_array(device_array)
    copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d
    copy()
    return device_array
Example #7
0
def numpy3d_to_array(np_array, allow_surface_bind=True):

    import pycuda.autoinit

    d, h, w = np_array.shape

    descr = drv.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = drv.dtype_to_array_format(np_array.dtype)
    descr.num_channels = 1
    descr.flags = 0

    if allow_surface_bind:
        descr.flags = drv.array3d_flags.SURFACE_LDST

    device_array = drv.Array(descr)

    copy = drv.Memcpy3D()
    copy.set_src_host(np_array)
    copy.set_dst_array(device_array)
    copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d

    copy()

    return device_array
Example #8
0
    def copyTexture(self, data, pixelSize=None, extent=None, isocenter=None):
        # Convert data to float32 array in Contiguous ordering.
        self.arrIn = np.array(data, dtype=np.float32, order='C')
        d, h, w = self.arrIn.shape

        descr = cuda.ArrayDescriptor3D()
        descr.width = w
        descr.height = h
        descr.depth = d
        descr.format = cuda.dtype_to_array_format(self.arrIn.dtype)
        descr.num_channels = 1
        # descr.flags = 0
        self.gpuTexture = cuda.Array(descr)

        # Copy array data across. This puts a 3D array in linear memory on the GPU.
        copy = cuda.Memcpy3D()
        copy.set_src_host(self.arrIn)
        copy.set_dst_array(self.gpuTexture)
        copy.src_pitch = self.arrIn.strides[1]
        copy.width_in_bytes = self.arrIn.strides[1]
        copy.height = h
        copy.depth = d
        copy.src_height = h
        copy()

        self.pixelSize = pixelSize
        self.isocenter = isocenter
        # Extent[l,r,b,t,f,b]
        self.extent = extent
        # Trigger for setting bottom left corner as 0,0,0.
        self.zeroExtent = False
Example #9
0
def numpy3d_to_array(np_array, allow_surface_bind=True):
    """Converts 3D numpy array to 3D device array.
    """
    # numpy3d_to_array
    # taken from pycuda mailing list (striped for C ordering only)

    d, h, w = np_array.shape

    descr = cuda.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = cuda.dtype_to_array_format(np_array.dtype)
    descr.num_channels = 1
    descr.flags = 0

    if allow_surface_bind:
        descr.flags = cuda.array3d_flags.SURFACE_LDST

    device_array = cuda.Array(descr)

    copy = cuda.Memcpy3D()
    copy.set_src_host(np_array)
    copy.set_dst_array(device_array)
    copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d

    copy()

    return device_array
Example #10
0
 def __init__(self):
     self.d_params = cuda.mem_alloc(
             self.ntemporal_samples * DevSrc.max_params * 4)
     self.palette_surf_dsc = util.argset(cuda.ArrayDescriptor3D(),
             height=self.palette_height, width=self.palette_width, depth=0,
             format=cuda.array_format.SIGNED_INT32,
             num_channels=2, flags=cuda.array3d_flags.SURFACE_LDST)
     self.d_pal_array = cuda.Array(self.palette_surf_dsc)
Example #11
0
def create_2D_array(mat):
    descr = driver.ArrayDescriptor()
    descr.width = mat.shape[1]
    descr.height = mat.shape[0]
    descr.format = driver.dtype_to_array_format(mat.dtype)
    descr.num_channels = 1
    descr.flags = 0
    ary = driver.Array(descr)
    return ary
Example #12
0
    def test_3d_texture(self):
        # adapted from code by Nicolas Pinto
        w = 2
        h = 4
        d = 8
        shape = (w, h, d)

        a = numpy.asarray(numpy.random.randn(*shape),
                          dtype=numpy.float32,
                          order="F")

        descr = drv.ArrayDescriptor3D()
        descr.width = w
        descr.height = h
        descr.depth = d
        descr.format = drv.dtype_to_array_format(a.dtype)
        descr.num_channels = 1
        descr.flags = 0

        ary = drv.Array(descr)

        copy = drv.Memcpy3D()
        copy.set_src_host(a)
        copy.set_dst_array(ary)
        copy.width_in_bytes = copy.src_pitch = a.strides[1]
        copy.src_height = copy.height = h
        copy.depth = d

        copy()

        mod = SourceModule("""
        texture<float, 3, cudaReadModeElementType> mtx_tex;

        __global__ void copy_texture(float *dest)
        {
          int x = threadIdx.x;
          int y = threadIdx.y;
          int z = threadIdx.z;
          int dx = blockDim.x;
          int dy = blockDim.y;
          int i = (z*dy + y)*dx + x;
          dest[i] = tex3D(mtx_tex, x, y, z);
          //dest[i] = x;
        }
        """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        mtx_tex.set_array(ary)

        dest = numpy.zeros(shape, dtype=numpy.float32, order="F")
        copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex])
        assert la.norm(dest - a) == 0
Example #13
0
    def alloc_coeff_arrays(s):
        f = np.zeros((s.nx, s.ny, s.nz), 'f')
        s.cex = np.ones_like(f) * 0.5
        s.cex[:, -1, :] = 0
        s.cex[:, :, -1] = 0
        s.cey = np.ones_like(f) * 0.5
        s.cey[:, :, -1] = 0
        s.cey[-1, :, :] = 0
        s.cez = np.ones_like(f) * 0.5
        s.cez[-1, :, :] = 0
        s.cez[:, -1, :] = 0

        descr = cuda.ArrayDescriptor3D()
        descr.width = s.nz
        descr.height = s.ny
        descr.depth = s.nx
        descr.format = cuda.dtype_to_array_format(f.dtype)
        descr.num_channels = 1
        descr.flags = 0
        s.tcex_gpu = cuda.Array(descr)
        s.tcey_gpu = cuda.Array(descr)
        s.tcez_gpu = cuda.Array(descr)

        mcpy = cuda.Memcpy3D()
        mcpy.width_in_bytes = mcpy.src_pitch = f.strides[1]
        mcpy.src_height = mcpy.height = s.ny
        mcpy.depth = s.nx
        mcpy.set_src_host(s.cex)
        mcpy.set_dst_array(s.tcex_gpu)
        mcpy()
        mcpy.set_src_host(s.cey)
        mcpy.set_dst_array(s.tcey_gpu)
        mcpy()
        mcpy.set_src_host(s.cez)
        mcpy.set_dst_array(s.tcez_gpu)
        mcpy()
Example #14
0
def gpuArray2DtocudaArray(gpuArray):
    #import pycuda.autoinit
    h, w = gpuArray.shape
    descr2D = cuda.ArrayDescriptor()
    descr2D.width = w
    descr2D.height = h
    descr2D.format = cuda.dtype_to_array_format(gpuArray.dtype)
    descr2D.num_channels = 1
    cudaArray = cuda.Array(descr2D)
    copy2D = cuda.Memcpy2D()
    copy2D.set_src_device(gpuArray.ptr)
    copy2D.set_dst_array(cudaArray)
    copy2D.src_pitch = gpuArray.strides[0]
    copy2D.width_in_bytes = copy2D.src_pitch = gpuArray.strides[0]
    copy2D.src_height = copy2D.height = h
    copy2D(aligned=True)
    return cudaArray, copy2D
Example #15
0
def np2DtoCudaArray(npArray, allowSurfaceBind=False):
    #import pycuda.autoinit
    h, w = npArray.shape
    descr2D = cuda.ArrayDescriptor()
    descr2D.width = w
    descr2D.height = h
    descr2D.format = cuda.dtype_to_array_format(npArray.dtype)
    descr2D.num_channels = 1
    if allowSurfaceBind:
        descr.flags = cuda.array3d_flags.SURFACE_LDST
    cudaArray = cuda.Array(descr2D)
    copy2D = cuda.Memcpy2D()
    copy2D.set_src_host(npArray)
    copy2D.set_dst_array(cudaArray)
    copy2D.src_pitch = npArray.strides[0]
    copy2D.width_in_bytes = copy2D.src_pitch = npArray.strides[0]
    copy2D.src_height = copy2D.height = h
    copy2D(aligned=True)
    return cudaArray, descr2D
def texCubeToGPU(texCube):
    descr = cuda.ArrayDescriptor3D()
    descr.width = texCube.shape[2]
    descr.height = texCube.shape[1]
    descr.depth = 6
    descr.format = cuda.dtype_to_array_format(texCube.dtype)
    descr.num_channels = 4
    descr.flags = cuda.array3d_flags.CUBEMAP

    texCubeArray = cuda.Array(descr)
    copy = cuda.Memcpy3D()
    copy.set_src_host(texCube)
    copy.set_dst_array(texCubeArray)
    copy.width_in_bytes = copy.src_pitch = texCube.strides[1]  #d*h*w*c
    copy.src_height = copy.height = texCube.shape[1]
    copy.depth = 6

    copy()

    return texCubeArray
Example #17
0
def np3DtoCudaArray(npArray, allowSurfaceBind=False):
    #import pycuda.autoinit
    d, h, w = npArray.shape
    descr3D = cuda.ArrayDescriptor3D()
    descr3D.width = w
    descr3D.height = h
    descr3D.depth = d
    descr3D.format = cuda.dtype_to_array_format(npArray.dtype)
    descr3D.num_channels = 1
    descr3D.flags = 0
    if allowSurfaceBind:
        descr3D.flags = cuda.array3d_flags.SURFACE_LDST
    cudaArray = cuda.Array(descr3D)
    copy3D = cuda.Memcpy3D()
    copy3D.set_src_host(npArray)
    copy3D.set_dst_array(cudaArray)
    copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[1]
    copy3D.src_height = copy3D.height = h
    copy3D.depth = d
    copy3D()
    return cudaArray, copy3D
Example #18
0
    def _prepare_F_texture(self):

        descr = drv.ArrayDescriptor3D()
        descr.width = self.side
        descr.height = self.side
        descr.depth = self.side
        descr.format = drv.dtype_to_array_format(self.F_gpu.dtype)
        descr.num_channels = 1
        descr.flags = 0

        F_array = drv.Array(descr)

        copy = drv.Memcpy3D()
        copy.set_src_device(self.F_gpu.gpudata)
        copy.set_dst_array(F_array)
        copy.width_in_bytes = copy.src_pitch = self.F_gpu.strides[1]
        copy.src_height = copy.height = self.side
        copy.depth = self.side

        self.F_gpu_to_array_copy = copy
        self.F_gpu_to_array_copy()
        self.F_texture.set_array(F_array)
Example #19
0
def gpu3D(src):
    """
    """
    w, h, d = src.shape
    descr = cuda.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = cuda.dtype_to_array_format(src.dtype)
    descr.num_channels = 1
    descr.flags = 0
    dst = cuda.Array(descr)

    copy = cuda.Memcpy3D()
    copy.set_src_host(src)
    copy.set_dst_array(dst)
    copy.width_in_bytes = copy.src_pitch = src.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d
    copy()

    return dst
hx_gpu = cuda.to_device(f)
hy_gpu = cuda.to_device(f)
hz_gpu = cuda.to_device(f)

cex = set_c(f, 'yz')
cey = set_c(f, 'zx')
cez = set_c(f, 'xy')

descr = cuda.ArrayDescriptor3D()
descr.width = nz
descr.height = ny
descr.depth = nx
descr.format = cuda.dtype_to_array_format(f.dtype)
descr.num_channels = 1
descr.flags = 0
tcex_gpu = cuda.Array(descr)
tcey_gpu = cuda.Array(descr)
tcez_gpu = cuda.Array(descr)

mcopy = cuda.Memcpy3D()
mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
mcopy.src_height = mcopy.height = ny
mcopy.depth = nx
arrcopy(mcopy, cex, tcex_gpu)
arrcopy(mcopy, cey, tcey_gpu)
arrcopy(mcopy, cez, tcez_gpu)

# prepare kernels
from pycuda.compiler import SourceModule
mod = SourceModule(
    kernels.replace('Dx', str(Dx)).replace('Dy', str(Dy)).replace(
    f = np.zeros((nx, ny, nz), 'f')
    g = np.zeros((nx, ny, nz), 'f')
    cf = np.random.randn(nx * ny * nz).astype(np.float32).reshape((nx, ny, nz))

    f_gpu = cuda.to_device(f)
    g_gpu = cuda.to_device(f)
    cf_gpu = cuda.to_device(cf)

    descr = cuda.ArrayDescriptor3D()
    descr.width = nz
    descr.height = ny
    descr.depth = nx
    descr.format = cuda.dtype_to_array_format(f.dtype)
    descr.num_channels = 1
    descr.flags = 0
    tcf_gpu = cuda.Array(descr)

    mcopy = cuda.Memcpy3D()
    mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
    mcopy.src_height = mcopy.height = ny
    mcopy.depth = nx
    arrcopy(mcopy, cf, tcf_gpu)

    # prepare kernels
    from pycuda.compiler import SourceModule
    kernels = kernels.replace('Dx', '16').replace('Dy', '16').replace(
        'nyz',
        str(ny * nz)).replace('nx',
                              str(nx)).replace('ny',
                                               str(ny)).replace('nz', str(nz))
    mod = SourceModule(kernels)
Example #22
0
def np3DtoCudaArray(npArray, prec, order = "C", allowSurfaceBind=False):
  ''' Some parameters like stride are explained in PyCUDA: driver.py test_driver.py gpuarray.py'''
  # For 1D-2D Cuda Arrays the descriptor is the same just puttin LAYERED flags
#   if order != "C": raise LogicError("Just implemented for C order")
  dimension = len(npArray.shape)
  case = order in ["C","F"]
  if not case:
    raise LogicError("order must be either F or C")
#   if dimension == 1:
#       w = npArray.shape[0]
#       h, d = 0,0
  if dimension == 2:
      if order == "C": stride = 0
      if order == "F": stride = -1
      h, w = npArray.shape
      d = 1
      if allowSurfaceBind:
        descrArr = cuda.ArrayDescriptor3D()
        descrArr.width = w
        descrArr.height = h
        descrArr.depth = d
      else:
        descrArr = cuda.ArrayDescriptor()
        descrArr.width = w
        descrArr.height = h
#         descrArr.depth = d
  elif dimension == 3:
      if order == "C": stride = 1
      if order == "F": stride = 1
      d, h, w = npArray.shape
      descrArr = cuda.ArrayDescriptor3D()
      descrArr.width = w
      descrArr.height = h
      descrArr.depth = d
  else:
      raise LogicError("CUDArray dimesnsion 2 and 3 supported at the moment ... ")
  if prec == 'float':
    descrArr.format = cuda.dtype_to_array_format(npArray.dtype)
    descrArr.num_channels = 1
  elif prec == 'cfloat': # Hack for complex 64 = (float 32, float 32) == (re,im)
    descrArr.format = cuda.array_format.SIGNED_INT32 # Reading data as int2 (hi=re,lo=im) structure
    descrArr.num_channels = 2
  elif prec == 'double': # Hack for doubles
    descrArr.format = cuda.array_format.SIGNED_INT32 # Reading data as int2 (hi,lo) structure
    descrArr.num_channels = 2
  elif prec == 'cdouble': # Hack for doubles
    descrArr.format = cuda.array_format.SIGNED_INT32 # Reading data as int4 (re=(hi,lo),im=(hi,lo)) structure
    descrArr.num_channels = 4
  else:
    descrArr.format = cuda.dtype_to_array_format(npArray.dtype)
    descrArr.num_channels = 1

  if allowSurfaceBind:
    if dimension==2:  descrArr.flags |= cuda.array3d_flags.ARRAY3D_LAYERED
    descrArr.flags |= cuda.array3d_flags.SURFACE_LDST

  cudaArray = cuda.Array(descrArr)
  if allowSurfaceBind or dimension==3 :
    copy3D = cuda.Memcpy3D()
    copy3D.set_src_host(npArray)
    copy3D.set_dst_array(cudaArray)
    copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[stride]
#     if dimension==3: copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[1] #Jut C order support
#     if dimension==2: copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[0] #Jut C order support
    copy3D.src_height = copy3D.height = h
    copy3D.depth = d
    copy3D()
    return cudaArray, copy3D
  else:
#     if dimension == 3:
#       copy3D = cuda.Memcpy3D()
#       copy3D.set_src_host(npArray)
#       copy3D.set_dst_array(cudaArray)
#       copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[stride]
# #       if dimension==3: copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[1] #Jut C order support
# #       if dimension==2: copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[0] #Jut C order support
#       copy3D.src_height = copy3D.height = h
#       copy3D.depth = d
#       copy3D()
#       return cudaArray, copy3D
#     if dimension == 2:
      cudaArray = cuda.Array(descrArr)
      copy2D = cuda.Memcpy2D()
      copy2D.set_src_host(npArray)
      copy2D.set_dst_array(cudaArray)
      copy2D.width_in_bytes = copy2D.src_pitch = npArray.strides[stride]
#       copy2D.width_in_bytes = copy2D.src_pitch = npArray.strides[0] #Jut C order support
      copy2D.src_height = copy2D.height = h
      copy2D(aligned=True)
      return cudaArray, copy2D
    ey_gpu = cuda.to_device(f)
    ez_gpu = cuda.to_device(f)
    hx_gpu = cuda.to_device(f)
    hy_gpu = cuda.to_device(f)
    hz_gpu = cuda.to_device(f)

    # memory allocate with cuda array
    descr = cuda.ArrayDescriptor3D()
    descr.width = nx
    descr.height = ny
    descr.depth = nz
    descr.format = cuda.dtype_to_array_format(f.dtype)
    descr.num_channels = 1
    descr.flags = 0

    cex_gpu = cuda.Array(descr)
    cey_gpu = cuda.Array(descr)
    cez_gpu = cuda.Array(descr)
    chx_gpu = cuda.Array(descr)
    chy_gpu = cuda.Array(descr)
    chz_gpu = cuda.Array(descr)

    mcopy = cuda.Memcpy3D()
    mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
    mcopy.src_height = mcopy.height = ny
    mcopy.depth = nz

    arrcopy(mcopy, set_c(f, (None, -1, -1)), cex_gpu)
    arrcopy(mcopy, set_c(f, (-1, None, -1)), cey_gpu)
    arrcopy(mcopy, set_c(f, (-1, -1, None)), cez_gpu)
    arrcopy(mcopy, set_c(f, (None, 0, 0)), chx_gpu)
Example #24
0
d = 4
shape = (d, h, w)

a = np.arange(24).reshape(*shape, order='C').astype('float32')
print(a.shape, a.strides)
print(a)

descr = drv.ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = drv.dtype_to_array_format(a.dtype)
descr.num_channels = 1
descr.flags = 0

ary = drv.Array(descr)

copy = drv.Memcpy3D()
copy.set_src_host(a)
copy.set_dst_array(ary)
copy.width_in_bytes = copy.src_pitch = a.strides[1]
copy.src_height = copy.height = h
copy.depth = d

copy()

mod = SourceModule("""
    texture<float, 3, cudaReadModeElementType> mtx_tex;

    __global__ void copy_texture(float *dest)
    {