Example #1
0
    def test_2d_fp_surfaces(self):
        orden = "C"
        npoints = 32

        for prec in [np.int16,np.float32,np.float64,np.complex64,np.complex128]:
            prec_str = dtype_to_ctype(prec)
            if prec == np.complex64: fpName_str = 'fp_tex_cfloat'
            elif prec == np.complex128: fpName_str = 'fp_tex_cdouble'
            elif prec == np.float64: fpName_str = 'fp_tex_double'
            else: fpName_str = prec_str
            A_cpu = np.zeros([npoints,npoints],order=orden,dtype=prec)
            A_cpu[:] = np.random.rand(npoints,npoints)[:]
            A_gpu = gpuarray.to_gpu(A_cpu) # Array randomized

            myKernRW = '''
            #include <pycuda-helpers.hpp>

            surface<void, cudaSurfaceType2DLayered> mtx_tex;

            __global__ void copy_texture(cuPres *dest, int rw)
            {
              int row   = blockIdx.x*blockDim.x + threadIdx.x;
              int col   = blockIdx.y*blockDim.y + threadIdx.y;
              int layer = 1;
              int tid = row + col*blockDim.x*gridDim.x ;
              if (rw==0){
              cuPres aux = dest[tid];
              fp_surf2DLayeredwrite(aux, mtx_tex, row, col, layer,cudaBoundaryModeClamp);}
              else {
              cuPres aux = 0;
              fp_surf2DLayeredread(&aux, mtx_tex, col, row, layer, cudaBoundaryModeClamp);
              dest[tid] = aux;
              }
            }
            '''
            myKernRW = myKernRW.replace('fpName',fpName_str)
            myKernRW = myKernRW.replace('cuPres',prec_str)
            modW = SourceModule(myKernRW)

            copy_texture = modW.get_function("copy_texture")
            mtx_tex = modW.get_surfref("mtx_tex")
            cuBlock = (8,8,1)
            if cuBlock[0]>npoints:
                cuBlock = (npoints,npoints,1)
            cuGrid   = (npoints//cuBlock[0]+1*(npoints % cuBlock[0] != 0 ),npoints//cuBlock[1]+1*(npoints % cuBlock[1] != 0 ),1)
            copy_texture.prepare('Pi')#,texrefs=[mtx_tex])
            A_gpu2 = gpuarray.zeros_like(A_gpu) # To initialize surface with zeros
            cudaArray = drv.gpuarray_to_array(A_gpu2,orden,allowSurfaceBind=True)
            A_cpu = A_gpu.get() # To remember original array
            mtx_tex.set_array(cudaArray)
            copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(0)) # Write random array
            copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(1)) # Read, but transposed
            assert np.sum(np.abs(A_gpu.get()-np.transpose(A_cpu))) == np.array(0,dtype=prec)
            A_gpu.gpudata.free()
Example #2
0
    def test_2d_fp_surfaces(self):
        orden = "C"
        npoints = 32

        for prec in [np.int16,np.float32,np.float64,np.complex64,np.complex128]:
            prec_str = dtype_to_ctype(prec)
            if prec == np.complex64: fpName_str = 'fp_tex_cfloat'
            elif prec == np.complex128: fpName_str = 'fp_tex_cdouble'
            elif prec == np.float64: fpName_str = 'fp_tex_double'
            else: fpName_str = prec_str
            A_cpu = np.zeros([npoints,npoints],order=orden,dtype=prec)
            A_cpu[:] = np.random.rand(npoints,npoints)[:]
            A_gpu = gpuarray.to_gpu(A_cpu) # Array randomized

            myKernRW = '''
            #include <pycuda-helpers.hpp>

            surface<void, cudaSurfaceType2DLayered> mtx_tex;

            __global__ void copy_texture(cuPres *dest, int rw)
            {
              int row   = blockIdx.x*blockDim.x + threadIdx.x;
              int col   = blockIdx.y*blockDim.y + threadIdx.y;
              int layer = 1;
              int tid = row + col*blockDim.x*gridDim.x ;
              if (rw==0){
              cuPres aux = dest[tid];
              fp_surf2DLayeredwrite(aux, mtx_tex, row, col, layer,cudaBoundaryModeClamp);}
              else {
              cuPres aux = 0;
              fp_surf2DLayeredread(&aux, mtx_tex, col, row, layer, cudaBoundaryModeClamp);
              dest[tid] = aux;
              }
            }
            '''
            myKernRW = myKernRW.replace('fpName',fpName_str)
            myKernRW = myKernRW.replace('cuPres',prec_str)
            modW = SourceModule(myKernRW)

            copy_texture = modW.get_function("copy_texture")
            mtx_tex = modW.get_surfref("mtx_tex")
            cuBlock = (8,8,1)
            if cuBlock[0]>npoints:
                cuBlock = (npoints,npoints,1)
            cuGrid   = (npoints//cuBlock[0]+1*(npoints % cuBlock[0] != 0 ),npoints//cuBlock[1]+1*(npoints % cuBlock[1] != 0 ),1)
            copy_texture.prepare('Pi')#,texrefs=[mtx_tex])
            A_gpu2 = gpuarray.zeros_like(A_gpu) # To initialize surface with zeros
            cudaArray = drv.gpuarray_to_array(A_gpu2,orden,allowSurfaceBind=True)
            A_cpu = A_gpu.get() # To remember original array
            mtx_tex.set_array(cudaArray)
            copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(0)) # Write random array
            copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(1)) # Read, but transposed
            assert np.sum(np.abs(A_gpu.get()-np.transpose(A_cpu))) == np.array(0,dtype=prec)
            A_gpu.gpudata.free()
Example #3
0
  if (x < Nx && y < Ny && z < Nz) {
    float value = tex3D(tex_in, (float) x, (float) y, float (z));

    surf3Dwrite((float) value, surf_out, sizeof(float) * x, y, z, cudaBoundaryModeZero);
  }

}
'''

mod = SourceModule(src_module, cache_dir=False, keep=False)

kernel = mod.get_function("test_3d_surf")
arg_types = (np.int32, np.int32, np.int32)

tex_in = mod.get_texref('tex_in')
surf_out = mod.get_surfref('surf_out')

# random shape
shape_x = np.random.randint(1, 255)
shape_y = np.random.randint(1, 255)
shape_z = np.random.randint(1, 255)

dtype = np.float32  # should match src_module's datatype

numpy_array_in = np.random.randn(shape_z, shape_y,
                                 shape_x).astype(dtype).copy()
cuda_array_in = numpy3d_to_array(numpy_array_in)
tex_in.set_array(cuda_array_in)

zeros = np.zeros_like(numpy_array_in)
cuda_array_out = numpy3d_to_array(zeros, allow_surface_bind=True)
Example #4
0
getAlphas = cudaCode.get_function( "getAlphas_kernel" )
getFFTderivatives = cudaCode.get_function( "getFFTderivatives_kernel" ) #V_FFT
getPartialsXY = cudaCode.get_function( "getPartialsXY_kernel" )
setBoundryConditionsKernel = cudaCode.get_function( 'setBoundryConditions_kernel' )
implicitStep1 = cudaCode.get_function( "implicitStep1_kernel" )
implicitStep2 = cudaCode.get_function( "implicitStep2_kernel" )
findActivityKernel = cudaCode.get_function( "findActivity_kernel" )
getActivityKernel = cudaCode.get_function( "getActivity_kernel" )
getVelocityKernel = cudaCode.get_function( "getVelocity_kernel" )
eulerStepKernel = cudaCode.get_function( "eulerStep_kernel" )
eulerStep_FFTKernel = cudaCode.get_function( "eulerStep_fft_kernel" )  ##V_FFT
#TEXTURE version
eulerStep_textKernel = cudaCode.get_function( "eulerStep_texture_kernel" )
tex_psiReal = cudaCode.get_texref("tex_psiReal")
tex_psiImag = cudaCode.get_texref("tex_psiImag")
surf_psiReal = cudaCode.get_surfref("surf_psiReal")
surf_psiImag = cudaCode.get_surfref("surf_psiImag")
if showKernelMemInfo: 
  kernelMemoryInfo(eulerStepKernel, 'eulerStepKernel')
  print ""
  kernelMemoryInfo(eulerStep_textKernel, 'eulerStepKernel_texture')
  print ""

########################################################################
from pycuda.elementwise import ElementwiseKernel
########################################################################
multiplyByScalarReal = ElementwiseKernel(arguments="cudaP a, cudaP *realArray".replace("cudaP", cudaP),
				operation = "realArray[i] = a*realArray[i] ",
				name = "multiplyByScalarReal_kernel")
########################################################################
multiplyByScalarComplex = ElementwiseKernel(arguments="cudaP a, pycuda::complex<cudaP> *psi".replace("cudaP", cudaP),
Example #5
0
block3D = (block_size_x, block_size_y, block_size_z)
grid3D = (gridx, gridy, gridz)
grid3D_ising = (gridx//2, gridy, gridz)


#initialize pyCUDA context 

cudaDevice = setCudaDevice( devN=useDevice, usingAnimation=True )

#Read and compile CUDA code
print "\nCompiling CUDA code"
cudaCodeString_raw = open("CUDAising3D.cu", "r").read()
cudaCodeString = cudaCodeString_raw # % { "BLOCK_WIDTH":block2D[0], "BLOCK_HEIGHT":block2D[1], "BLOCK_DEPTH":block2D[2], }
cudaCode = SourceModule(cudaCodeString)
tex_spins = cudaCode.get_texref('tex_spinsIn')
surf_spins = cudaCode.get_surfref('surf_spinsOut')
isingKernel = cudaCode.get_function('ising_kernel')
########################################################################
from pycuda.elementwise import ElementwiseKernel
########################################################################
changeIntToFloat = ElementwiseKernel(arguments="float a, float b, int *input, float *output",
			      operation = "output[i] = a*input[i] + b;",
			      name = "intToFloat_kernel")
########################################################################
floatToUchar = ElementwiseKernel(arguments="float *input, unsigned char *output",
				operation = "output[i] = (unsigned char) ( -255*(input[i]-1));",
				name = "floatToUchar_kernel")
########################################################################
def sendToScreen( plotData ):
  floatToUchar( plotDataFloat_d, plotData_d )
  copyToScreenArray()
Example #6
0
  if (x < Nx && y < Ny && z < Nz) {
    float value = tex3D(tex_in, (float) x, (float) y, float (z));

    surf3Dwrite((float) value, surf_out, sizeof(float) * x, y, z, cudaBoundaryModeZero);
  }

}
'''

mod=SourceModule(src_module, cache_dir=False, keep=False)

kernel=mod.get_function("test_3d_surf")
arg_types = (np.int32, np.int32, np.int32)

tex_in=mod.get_texref('tex_in')
surf_out=mod.get_surfref('surf_out')

# random shape
shape_x = np.random.randint(1,255)
shape_y = np.random.randint(1,255)
shape_z = np.random.randint(1,255)

dtype=np.float32 # should match src_module's datatype

numpy_array_in=np.random.randn(shape_z, shape_y, shape_x).astype(dtype).copy()
cuda_array_in = numpy3d_to_array(numpy_array_in)
tex_in.set_array(cuda_array_in)

zeros=np.zeros_like(numpy_array_in)
cuda_array_out = numpy3d_to_array(zeros,allow_surface_bind=True)
surf_out.set_array(cuda_array_out)
Example #7
0
block3D = (block_size_x, block_size_y, block_size_z)
grid3D = (gridx, gridy, gridz)
grid3D_ising = (gridx//2, gridy, gridz)


#initialize pyCUDA context 

cudaDevice = setCudaDevice( devN=useDevice, usingAnimation=True )

#Read and compile CUDA code
print "\nCompiling CUDA code"
cudaCodeString_raw = open("CUDAising3D.cu", "r").read()
cudaCodeString = cudaCodeString_raw # % { "BLOCK_WIDTH":block2D[0], "BLOCK_HEIGHT":block2D[1], "BLOCK_DEPTH":block2D[2], }
cudaCode = SourceModule(cudaCodeString)
tex_spins = cudaCode.get_texref('tex_spinsIn')
surf_spins = cudaCode.get_surfref('surf_spinsOut')
isingKernel = cudaCode.get_function('ising_kernel')
########################################################################
from pycuda.elementwise import ElementwiseKernel
########################################################################
changeIntToFloat = ElementwiseKernel(arguments="float a, float b, int *input, float *output",
			      operation = "output[i] = a*input[i] + b;",
			      name = "intToFloat_kernel")
########################################################################
floatToUchar = ElementwiseKernel(arguments="float *input, unsigned char *output",
				operation = "output[i] = (unsigned char) ( -255*(input[i]-1));",
				name = "floatToUchar_kernel")
########################################################################
def sendToScreen( plotData ):
  floatToUchar( plotDataFloat_d, plotData_d )
  copyToScreenArray()
Example #8
0
block3D = (block_size_x, block_size_y, block_size_z)



#Initialize openGL
volumeRender.initGL()    
#initialize pyCUDA context 
cudaDevice = setCudaDevice(devN=useDevice, usingAnimation=True )

#Read and compile CUDA code
print "Compiling CUDA code"
cudaCodeString_raw = open("CUDAheat3D.cu", "r").read()
cudaCodeString = cudaCodeString_raw % { "BLOCK_WIDTH":block3D[0], "BLOCK_HEIGHT":block3D[1], "BLOCK_DEPTH":block3D[2], }
cudaCode = SourceModule(cudaCodeString)
tex_tempIn = cudaCode.get_texref("tex_tempIn")
surf_tempOut = cudaCode.get_surfref("surf_tempOut")
eulerKernel_tex = cudaCode.get_function("euler_kernel_texture" )
eulerKernel_shrd = cudaCode.get_function("euler_kernel_shared" )
########################################################################
from pycuda.elementwise import ElementwiseKernel
########################################################################
copyDtoD_float = ElementwiseKernel(arguments="float *input, float *output",
			      operation = "output[i] = input[i];")
########################################################################
floatToUchar = ElementwiseKernel(arguments="float *input, unsigned char *output",
				operation = "output[i] = (unsigned char) ( -255*(input[i]-1));")
########################################################################
multiplyByFloat = ElementwiseKernel(arguments="float a, float *input",
			      operation = "input[i] = a*input[i];")
########################################################################
def sendToScreen( plotData ):
Example #9
0
surface<void, 2> surf;
__global__ void kernel(int width, int height)
{
    // Calculate surface coordinates
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x < 400 && y < 400) {
        float data = x / 400.f;
        // Write to output surface
        surf2Dwrite(data, surf, x*4, y);
    }
}
""")

kernel_function = mod.get_function('kernel')
surface_ref = mod.get_surfref('surf')
# surface_ref.set_array(Density.ping_array,0)
surface_ref.set_array(Density.ping_array)

def Program(fragment):
    program = gloo.Program("vertex_passthrough.vert", fragment, count=4)
    program['Position'] = [(-1,-1), (-1,+1), (+1,-1), (+1,+1)]
    return program



Density = Slab(GridWidth, GridHeight, 1, gl.GL_LINEAR)

prog_visualize = Program("visualize.frag")

def ClearSurface(surface, v):
Example #10
0
  #'blockDim.x': block3D[0], 'blockDim.y': block3D[1], 'blockDim.z': block3D[2],
  #'gridDim.x': grid3D[0], 'gridDim.y': grid3D[1], 'gridDim.z': grid3D[2] }
cudaCode = SourceModule(cudaCodeString)
#setFlux_kernel = cudaCode.get_function('setFlux')
setInterFlux_hll_kernel = cudaCode.get_function('setInterFlux_hll')
getInterFlux_hll_kernel = cudaCode.get_function('getInterFlux_hll')
iterPoissonStep_kernel = cudaCode.get_function('iterPoissonStep')
getGravityForce_kernel = cudaCode.get_function('getGravityForce')
getBounderyPotential_kernel = cudaCode.get_function('getBounderyPotential')
reduceDensity_kernel = cudaCode.get_function('reduceDensity' )
tex_1 = cudaCode.get_texref("tex_1")
tex_2 = cudaCode.get_texref("tex_2")
tex_3 = cudaCode.get_texref("tex_3")
tex_4 = cudaCode.get_texref("tex_4")
tex_5 = cudaCode.get_texref("tex_5")
surf_1 = cudaCode.get_surfref("surf_1")
surf_2 = cudaCode.get_surfref("surf_2")
surf_3 = cudaCode.get_surfref("surf_3")
surf_4 = cudaCode.get_surfref("surf_4")
surf_5 = cudaCode.get_surfref("surf_5")
########################################################################
convertToUCHAR = ElementwiseKernel(arguments="cudaP normaliztion, cudaP *values, unsigned char *psiUCHAR".replace("cudaP", cudaP),
			      operation = "psiUCHAR[i] = (unsigned char) ( -255*( values[i]*normaliztion -1 ) );",
			      name = "sendModuloToUCHAR_kernel")
########################################################################
getTimeMin_kernel = ReductionKernel( np.dtype( cudaPre ),
			    neutral = "1e6",
			    arguments=" float delta, cudaP* cnsv_rho, cudaP* cnsv_vel, float* soundVel".replace("cudaP", cudaP),
			    map_expr = " delta / ( abs( cnsv_vel[i]/ cnsv_rho[i] ) +  soundVel[i]   )    ",
			    reduce_expr = "min(a,b)",
			    name = "getTimeMin_kernel")