def _init_gaussian(self, sigma): """Create a buffer of the right size according to the width of the gaussian ... :param sigma: width of the gaussian, the length of the function will be 8*sigma + 1 Same calculation done on CPU x = numpy.arange(size) - (size - 1.0) / 2.0 gaussian = numpy.exp(-(x / sigma) ** 2 / 2.0).astype(numpy.float32) gaussian /= gaussian.sum(dtype=numpy.float32) """ pyopencl.enqueue_barrier(self.queue).wait() name = "gaussian_%s" % sigma size = kernel_size(sigma, True) wg_size = nextpower(size) logger.info("Allocating %s float for blur sigma: %s. wg=%s max_wg=%s", size, sigma, wg_size, self.block_size) wg1 = self.kernels_wg["gaussian"] if wg1 >= wg_size: gaussian_gpu = pyopencl.array.empty(self.queue, size, dtype=numpy.float32) pyopencl.enqueue_barrier(self.queue).wait() kernel = self.kernels.get_kernel("gaussian") shm1 = pyopencl.LocalMemory(4 * wg_size) shm2 = pyopencl.LocalMemory(4 * wg_size) evt = kernel( self.queue, (wg_size, ), (wg_size, ), gaussian_gpu.data, numpy.float32(sigma), # const float sigma, numpy.int32(size), # const int SIZE shm1, shm2) # some shared memory pyopencl.enqueue_barrier(self.queue).wait() if self.profile: self.events.append(("gaussian %s" % sigma, evt)) else: logger.info( "Workgroup size error: gaussian wg: %s < max_work_group_size: %s", wg1, self.block_size) # common bug on OSX when running on CPU x = numpy.arange(size) - (size - 1.0) / 2.0 gaus = numpy.exp(-(x / sigma)**2 / 2.0).astype(numpy.float32) gaus /= gaus.sum(dtype=numpy.float32) gaussian_gpu = pyopencl.array.to_device(self.queue, gaus) self.cl_mem[name] = gaussian_gpu return gaussian_gpu
def _init_gaussian(self, sigma): """Create a buffer of the right size according to the width of the gaussian ... :param sigma: width of the gaussian, the length of the function will be 8*sigma + 1 Same calculation done on CPU x = numpy.arange(size) - (size - 1.0) / 2.0 gaussian = numpy.exp(-(x / sigma) ** 2 / 2.0).astype(numpy.float32) gaussian /= gaussian.sum(dtype=numpy.float32) """ pyopencl.enqueue_barrier(self.queue).wait() name = "gaussian_%s" % sigma size = kernel_size(sigma, True) wg_size = nextpower(size) logger.info("Allocating %s float for blur sigma: %s. wg=%s max_wg=%s", size, sigma, wg_size, self.block_size) wg1 = self.kernels_wg["gaussian"] if wg1 >= wg_size: gaussian_gpu = pyopencl.array.empty(self.queue, size, dtype=numpy.float32) pyopencl.enqueue_barrier(self.queue).wait() kernel = self.kernels.get_kernel("gaussian") shm1 = pyopencl.LocalMemory(4 * wg_size) shm2 = pyopencl.LocalMemory(4 * wg_size) evt = kernel(self.queue, (wg_size,), (wg_size,), gaussian_gpu.data, numpy.float32(sigma), # const float sigma, numpy.int32(size), # const int SIZE shm1, shm2) # some shared memory pyopencl.enqueue_barrier(self.queue).wait() if self.profile: self.events.append(("gaussian %s" % sigma, evt)) else: logger.info("Workgroup size error: gaussian wg: %s < max_work_group_size: %s", wg1, self.block_size) # common bug on OSX when running on CPU x = numpy.arange(size) - (size - 1.0) / 2.0 gaus = numpy.exp(-(x / sigma) ** 2 / 2.0).astype(numpy.float32) gaus /= gaus.sum(dtype=numpy.float32) gaussian_gpu = pyopencl.array.to_device(self.queue, gaus) self.cl_mem[name] = gaussian_gpu return gaussian_gpu