def __init__(self, GPUContext, CommandQueue, numElements): # Constants MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE = 1024 MAX_LOCAL_GROUP_SIZE = 256 self.WORKGROUP_SIZE = 256 self.MAX_BATCH_ELEMENTS = 64 * 1048576; #64 * numElements self.MIN_SHORT_ARRAY_SIZE = 4; self.MAX_SHORT_ARRAY_SIZE = 4 * self.WORKGROUP_SIZE; self.MIN_LARGE_ARRAY_SIZE = 8 * self.WORKGROUP_SIZE; self.MAX_LARGE_ARRAY_SIZE = 4 * self.WORKGROUP_SIZE * self.WORKGROUP_SIZE; self.size_uint = size_uint = np.uint32(0).nbytes # OpenCL elements self.cxGPUContext = GPUContext self.cqCommandQueue = CommandQueue self.mNumElements = numElements mf = cl.mem_flags if (numElements > MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE): self.d_Buffer = cl.Buffer(self.cxGPUContext, mf.READ_WRITE, np.int(numElements/MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE * size_uint)) # Program src_file = clu.get_pysph_root() + '/base/Scan_b.cl' src = open(src_file).read() cpProgram = cl.Program(self.cxGPUContext, src).build() # Kernel self.ckScanExclusiveLocal1 = cpProgram.scanExclusiveLocal1 self.ckScanExclusiveLocal2 = cpProgram.scanExclusiveLocal2 self.ckUniformUpdate = cpProgram.uniformUpdate
def _create_program(self): """Read the OpenCL kernel file and build""" src_file = clu.get_pysph_root() + '/base/RadixSortVal.cl' src = open(src_file).read() self.program = cl.Program(self.context, src).build()
def _setup_program(self): """ Read the OpenCL kernel source file and build the program """ src_file = get_pysph_root() + '/base/linked_list.cl' src = cl_read(src_file, precision=self.cl_precision) self.prog = cl.Program(self.context, src).build()