#Read and compile CUDA code print "\nCompiling CUDA code\n" codeFiles = [ "vector2D.h", "circle.h", "line.h", "cudaBillar.cu"] for fileName in codeFiles: codeString = open(fileName, "r").read().replace("cudaP", cudaP) outFile = open( fileName + "T", "w" ) outFile.write( codeString ) outFile.close() cudaCodeStringTemp = open("cudaBillar.cuT", "r").read() cudaCodeString = cudaCodeStringTemp % { "nCIRCLES":nCircles, "nLINES":nLines, "THREADS_PER_BLOCK":block[0], "TIME_INDEX_MAX":maxTimeIndx } cudaCode = SourceModule(cudaCodeString, no_extern_c=True, include_dirs=[currentDirectory, toolsDirectory]) mainKernel = cudaCode.get_function("main_kernel" ) if showKernelMemInfo: kernelMemoryInfo(mainKernel, 'mainKernel') print "" ########################################################################### ########################################################################### #Initialize Data nData = particlesForPlot*collisionsForPlot if not plotFinal: nData = 1 print "Initializing CUDA memory" np.random.seed(int(time.time())) #Change numpy random seed initialFreeMemory = getFreeMemory( show=True ) initialPosX_h = 0.49*np.ones(nParticles).astype(cudaPre) initialPosY_h = 0.49*np.ones(nParticles).astype(cudaPre) initialTheta = 2*np.pi*np.random.rand(nParticles).astype(cudaPre) - np.pi initialVelX_h = np.cos(initialTheta) initialVelY_h = np.sin(initialTheta) initialRegionX_h = np.zeros(nParticles).astype(np.int32)
setBoundryConditionsKernel = cudaCode.get_function( 'setBoundryConditions_kernel' ) implicitStep1 = cudaCode.get_function( "implicitStep1_kernel" ) implicitStep2 = cudaCode.get_function( "implicitStep2_kernel" ) findActivityKernel = cudaCode.get_function( "findActivity_kernel" ) getActivityKernel = cudaCode.get_function( "getActivity_kernel" ) getVelocityKernel = cudaCode.get_function( "getVelocity_kernel" ) eulerStepKernel = cudaCode.get_function( "eulerStep_kernel" ) eulerStep_FFTKernel = cudaCode.get_function( "eulerStep_fft_kernel" ) ##V_FFT #TEXTURE version eulerStep_textKernel = cudaCode.get_function( "eulerStep_texture_kernel" ) tex_psiReal = cudaCode.get_texref("tex_psiReal") tex_psiImag = cudaCode.get_texref("tex_psiImag") surf_psiReal = cudaCode.get_surfref("surf_psiReal") surf_psiImag = cudaCode.get_surfref("surf_psiImag") if showKernelMemInfo: kernelMemoryInfo(eulerStepKernel, 'eulerStepKernel') print "" kernelMemoryInfo(eulerStep_textKernel, 'eulerStepKernel_texture') print "" ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## multiplyByScalarReal = ElementwiseKernel(arguments="cudaP a, cudaP *realArray".replace("cudaP", cudaP), operation = "realArray[i] = a*realArray[i] ", name = "multiplyByScalarReal_kernel") ######################################################################## multiplyByScalarComplex = ElementwiseKernel(arguments="cudaP a, pycuda::complex<cudaP> *psi".replace("cudaP", cudaP), operation = "psi[i] = a*psi[i] ", name = "multiplyByScalarComplex_kernel", preamble="#include <pycuda-complex.hpp>")
grid2D = (gridx, gridy, 1) nBlocks = grid2D[0] + grid2D[1] #Read and compile CUDA code print "\nCompiling CUDA code\n" cudaCodeString_raw = open("cudaPercolation2D.cu", "r").read().replace("cudaP", cudaP) cudaCodeString = cudaCodeString_raw % { "THREADS_PER_BLOCK":block2D[0]*block2D[1], "B_WIDTH":block2D[0], "B_HEIGHT":block2D[1] } cudaCode = SourceModule(cudaCodeString) mainKernel_tex = cudaCode.get_function("main_kernel_tex" ) mainKernel_sh = cudaCode.get_function("main_kernel_shared" ) findActivityKernel = cudaCode.get_function( "findActivity_kernel" ) getActivityKernel = cudaCode.get_function( "getActivity_kernel" ) tex_isFree = cudaCode.get_texref('tex_isFree') tex_concentrationIn = cudaCode.get_texref('tex_concentrationIn') if showKernelMemInfo: kernelMemoryInfo(mainKernel_tex, 'mainKernel_tex') print "" kernelMemoryInfo(mainKernel_sh, 'mainKernel_shared') print "" ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## scalePlotData = ElementwiseKernel(arguments="cudaP a, cudaP *realArray, unsigned char showActivity, unsigned char *activeThreads".replace("cudaP", cudaP), operation = "realArray[i] = log10( 1 + (a*realArray[i] ) ) + activeThreads[i]*showActivity ; ", name = "multiplyByScalarReal_kernel") ########################################################################### nIter = 0 def oneIteration_tex(): global nIter mainKernel_tex( np.int32(nWidth), np.int32(nHeight), cudaPre(hx), isFree_d, concentrationOut_d, grid=grid2D, block=block2D, texrefs=[tex_isFree, tex_concentrationIn] )
def stepFuntion(): # maxVal = ( gpuarray.max( cnsv1_d ) ).get() maxVal = reduction_max( cnsv_d, prePartialSum_d, partialSum_h, partialSum_d ) # maxVal = 1. # convertToUCHAR_old( np.int32(0), np.int32(nCells), cudaPre( 0.95/maxVal ), cnsv1_d, plotData_d) convertToUCHAR( np.int32(0), np.int32(nCells), cudaPre( 0.95/maxVal ), cnsv_d, plotData_d, grid=grid1D, block=block1D) copyToScreenArray() timeStepHydro() # if usingGravity: getGravForce() ######################################################################## if showKernelMemInfo: #kernelMemoryInfo( setFlux_kernel, 'setFlux_kernel') #print "" kernelMemoryInfo( setInterFlux_hll_kernel, 'setInterFlux_hll_kernel') print "" kernelMemoryInfo( getInterFlux_hll_kernel, 'getInterFlux_hll_kernel') print "" kernelMemoryInfo( iterPoissonStep_kernel, 'iterPoissonStep_kernel') print "" kernelMemoryInfo( getBounderyPotential_kernel, 'getBounderyPotential_kernel') print "" kernelMemoryInfo( reduceDensity_kernel, 'reduceDensity_kernel') print "" ######################################################################## ######################################################################## print "\nInitializing Data" initialMemory = getFreeMemory( show=True ) rho = np.zeros( X.shape, dtype=cudaPre ) #density vx = np.zeros( X.shape, dtype=cudaPre )
grid3D = (gridx, gridy, gridz) #Read and compile CUDA code print "\nCompiling CUDA code\n" cudaCodeString_raw = open("cudaPercolation3D.cu", "r").read().replace("cudaP", cudaP) cudaCodeString = cudaCodeString_raw % { "THREADS_PER_BLOCK":block3D[0]*block3D[1]*block3D[2], "B_WIDTH":block3D[0], "B_HEIGHT":block3D[1], "B_DEPTH":block3D[2] } cudaCode = SourceModule(cudaCodeString) #countFreeNeighborsKernel = cudaCode.get_function("countFreeNeighbors_kernel") mainKernel_tex = cudaCode.get_function("main_kernel_tex" ) #mainKernel_sh = cudaCode.get_function("main_kernel_shared" ) tex_isFree = cudaCode.get_texref('tex_isFree') #tex_nNeighb = cudaCode.get_texref('tex_nNeighb') tex_concentrationIn = cudaCode.get_texref('tex_concentrationIn') ##surf_concentrationOut = cudaCode.get_surfref('surf_concentrationOut') if showKernelMemInfo: kernelMemoryInfo(mainKernel_tex, 'mainKernel_tex') print "" #kernelMemoryInfo(mainKernel_sh, 'mainKernel_shared') #print "" #sys.exit() ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## multiplyByScalarReal = ElementwiseKernel(arguments="cudaP a, cudaP *realArray".replace("cudaP", cudaP), operation = "realArray[i] = a*realArray[i] ", name = "multiplyByScalarReal_kernel") ########################################################################### floatToUchar = ElementwiseKernel(arguments="float *input, unsigned char *output", operation = "output[i] = (unsigned char) ( -255*(input[i]-1));", name = "floatToUchar_kernel") ########################################################################
cudaCodeString_raw = open("cudaPercolation2D.cu", "r").read().replace("cudaP", cudaP) cudaCodeString = cudaCodeString_raw % { "THREADS_PER_BLOCK": block2D[0] * block2D[1], "B_WIDTH": block2D[0], "B_HEIGHT": block2D[1] } cudaCode = SourceModule(cudaCodeString) mainKernel_tex = cudaCode.get_function("main_kernel_tex") mainKernel_sh = cudaCode.get_function("main_kernel_shared") findActivityKernel = cudaCode.get_function("findActivity_kernel") getActivityKernel = cudaCode.get_function("getActivity_kernel") tex_isFree = cudaCode.get_texref('tex_isFree') tex_concentrationIn = cudaCode.get_texref('tex_concentrationIn') if showKernelMemInfo: kernelMemoryInfo(mainKernel_tex, 'mainKernel_tex') print "" kernelMemoryInfo(mainKernel_sh, 'mainKernel_shared') print "" ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## scalePlotData = ElementwiseKernel( arguments= "cudaP a, cudaP *realArray, unsigned char showActivity, unsigned char *activeThreads" .replace("cudaP", cudaP), operation= "realArray[i] = log10( 1 + (a*realArray[i] ) ) + activeThreads[i]*showActivity ; ", name="multiplyByScalarReal_kernel") ########################################################################### nIter = 0