def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None): """ @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients @param checksum: pre - calculated checksum to prevent re - calculating it :) """ self.BLOCK_SIZE = 16 self._sem = threading.Semaphore() self._lut = lut self.bins, self.lut_size = lut.shape self.size = image_size if not checksum: checksum = crc32(self._lut) self.on_device = {"lut":checksum, "dark":None, "flat":None, "polarization":None, "solidangle":None} self._cl_kernel_args = {} self._cl_mem = {} if (platformid is None) and (deviceid is None): platformid, deviceid = ocl.select_device(devicetype) elif platformid is None: platformid = 0 elif deviceid is None: deviceid = 0 self.platform = ocl.platforms[platformid] self.device = self.platform.devices[deviceid] self.device_type = self.device.type if (self.device_type == "CPU") and (self.platform.vendor == "Apple"): logger.warning("This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1") self.BLOCK_SIZE = 1 self.workgroup_size = self.BLOCK_SIZE, self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), try: self._ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) self._queue = pyopencl.CommandQueue(self._ctx) self._allocate_buffers() self._compile_kernels() self._set_kernel_arguments() except pyopencl.MemoryError as error: raise MemoryError(error) if self.device_type == "CPU": pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut) else: pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())
def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None): """ @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients @param checksum: pre - calculated checksum to prevent re - calculating it :) """ self.BLOCK_SIZE = 16 self._sem = threading.Semaphore() self._lut = lut self.bins, self.lut_size = lut.shape self.size = image_size if not checksum: checksum = crc32(self._lut) self.on_device = { "lut": checksum, "dark": None, "flat": None, "polarization": None, "solidangle": None } self._cl_kernel_args = {} self._cl_mem = {} if (platformid is None) and (deviceid is None): platformid, deviceid = ocl.select_device(devicetype) elif platformid is None: platformid = 0 elif deviceid is None: deviceid = 0 self.platform = ocl.platforms[platformid] self.device = self.platform.devices[deviceid] self.device_type = self.device.type if (self.device_type == "CPU") and (self.platform.vendor == "Apple"): logger.warning( "This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1" ) self.BLOCK_SIZE = 1 self.workgroup_size = self.BLOCK_SIZE, self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), try: self._ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[platformid].get_devices()[deviceid] ]) self._queue = pyopencl.CommandQueue(self._ctx) self._allocate_buffers() self._compile_kernels() self._set_kernel_arguments() except pyopencl.MemoryError as error: raise MemoryError(error) if self.device_type == "CPU": pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut) else: pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())
def integrate(self, data, dummy=None, delta_dummy=None, dark=None, flat=None, solidAngle=None, polarization=None, dark_checksum=None, flat_checksum=None, solidAngle_checksum=None, polarization_checksum=None): with self._sem: if data.dtype == numpy.uint16: copy_image = pyopencl.enqueue_copy( self._queue, self._cl_mem["image_u16"], numpy.ascontiguousarray(data)) cast_u16_to_float = self._program.u16_to_float( self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["u16_to_float"]) elif data.dtype == numpy.int32: copy_image = pyopencl.enqueue_copy( self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data)) cast_s32_to_float = self._program.s32_to_float( self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["s32_to_float"]) else: copy_image = pyopencl.enqueue_copy( self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data, dtype=numpy.float32)) memset = self._program.memset_out( self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["memset_out"]) if dummy is not None: do_dummy = numpy.int32(1) dummy = numpy.float32(dummy) if delta_dummy == None: delta_dummy = numpy.float32(0) else: delta_dummy = numpy.float32(abs(delta_dummy)) else: do_dummy = numpy.int32(0) dummy = numpy.float32(0) delta_dummy = numpy.float32(0) self._cl_kernel_args["corrections"][9] = do_dummy self._cl_kernel_args["corrections"][10] = dummy self._cl_kernel_args["corrections"][11] = delta_dummy self._cl_kernel_args["lut_integrate"][2] = do_dummy self._cl_kernel_args["lut_integrate"][3] = dummy if dark is not None: do_dark = numpy.int32(1) if not dark_checksum: dark_checksum = crc32(dark) if dark_checksum != self.on_device["dark"]: pyopencl.enqueue_copy( self._queue, self._cl_mem["dark"], numpy.ascontiguousarray(dark, dtype=numpy.float32)) self.on_device["dark"] = dark_checksum else: do_dark = numpy.int32(0) self._cl_kernel_args["corrections"][1] = do_dark if flat is not None: do_flat = numpy.int32(1) if not flat_checksum: flat_checksum = crc32(flat) if self.on_device["flat"] != flat_checksum: pyopencl.enqueue_copy( self._queue, self._cl_mem["flat"], numpy.ascontiguousarray(flat, dtype=numpy.float32)) self.on_device["flat"] = flat_checksum else: do_flat = numpy.int32(0) self._cl_kernel_args["corrections"][3] = do_flat if solidAngle is not None: do_solidAngle = numpy.int32(1) if not solidAngle_checksum: solidAngle_checksum = crc32(solidAngle) if solidAngle_checksum != self.on_device["solidangle"]: pyopencl.enqueue_copy( self._queue, self._cl_mem["solidangle"], numpy.ascontiguousarray(solidAngle, dtype=numpy.float32)) self.on_device["solidangle"] = solidAngle_checksum else: do_solidAngle = numpy.int32(0) self._cl_kernel_args["corrections"][5] = do_solidAngle if polarization is not None: do_polarization = numpy.int32(1) if not polarization_checksum: polarization_checksum = crc32(polarization) if polarization_checksum != self.on_device["polarization"]: pyopencl.enqueue_copy( self._queue, self._cl_mem["polarization"], numpy.ascontiguousarray(polarization, dtype=numpy.float32)) self.on_device["polarization"] = polarization_checksum else: do_polarization = numpy.int32(0) self._cl_kernel_args["corrections"][7] = do_polarization copy_image.wait() if do_dummy + do_polarization + do_solidAngle + do_flat + do_dark > 0: self._program.corrections( self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["corrections"]).wait() memset.wait() integrate = self._program.lut_integrate( self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["lut_integrate"]) outMerge = numpy.zeros(self.bins, dtype=numpy.float32) outData = numpy.zeros(self.bins, dtype=numpy.float32) outCount = numpy.zeros(self.bins, dtype=numpy.float32) integrate.wait() pyopencl.enqueue_copy(self._queue, outMerge, self._cl_mem["outMerge"]).wait() pyopencl.enqueue_copy(self._queue, outData, self._cl_mem["outData"]).wait() pyopencl.enqueue_copy(self._queue, outCount, self._cl_mem["outCount"]).wait() return outMerge, outData, outCount
def integrate(self, data, dummy=None, delta_dummy=None, dark=None, flat=None, solidAngle=None, polarization=None, dark_checksum=None, flat_checksum=None, solidAngle_checksum=None, polarization_checksum=None): with self._sem: if data.dtype == numpy.uint16: copy_image = pyopencl.enqueue_copy(self._queue, self._cl_mem["image_u16"], numpy.ascontiguousarray(data)) cast_u16_to_float = self._program.u16_to_float(self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["u16_to_float"]) elif data.dtype == numpy.int32: copy_image = pyopencl.enqueue_copy(self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data)) cast_s32_to_float = self._program.s32_to_float(self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["s32_to_float"]) else: copy_image = pyopencl.enqueue_copy(self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data, dtype=numpy.float32)) memset = self._program.memset_out(self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["memset_out"]) if dummy is not None: do_dummy = numpy.int32(1) dummy = numpy.float32(dummy) if delta_dummy == None: delta_dummy = numpy.float32(0) else: delta_dummy = numpy.float32(abs(delta_dummy)) else: do_dummy = numpy.int32(0) dummy = numpy.float32(0) delta_dummy = numpy.float32(0) self._cl_kernel_args["corrections"][9] = do_dummy self._cl_kernel_args["corrections"][10] = dummy self._cl_kernel_args["corrections"][11] = delta_dummy self._cl_kernel_args["lut_integrate"][2] = do_dummy self._cl_kernel_args["lut_integrate"][3] = dummy if dark is not None: do_dark = numpy.int32(1) if not dark_checksum: dark_checksum = crc32(dark) if dark_checksum != self.on_device["dark"]: pyopencl.enqueue_copy(self._queue, self._cl_mem["dark"], numpy.ascontiguousarray(dark, dtype=numpy.float32)) self.on_device["dark"] = dark_checksum else: do_dark = numpy.int32(0) self._cl_kernel_args["corrections"][1] = do_dark if flat is not None: do_flat = numpy.int32(1) if not flat_checksum: flat_checksum = crc32(flat) if self.on_device["flat"] != flat_checksum: pyopencl.enqueue_copy(self._queue, self._cl_mem["flat"], numpy.ascontiguousarray(flat, dtype=numpy.float32)) self.on_device["flat"] = flat_checksum else: do_flat = numpy.int32(0) self._cl_kernel_args["corrections"][3] = do_flat if solidAngle is not None: do_solidAngle = numpy.int32(1) if not solidAngle_checksum: solidAngle_checksum = crc32(solidAngle) if solidAngle_checksum != self.on_device["solidangle"]: pyopencl.enqueue_copy(self._queue, self._cl_mem["solidangle"], numpy.ascontiguousarray(solidAngle, dtype=numpy.float32)) self.on_device["solidangle"] = solidAngle_checksum else: do_solidAngle = numpy.int32(0) self._cl_kernel_args["corrections"][5] = do_solidAngle if polarization is not None: do_polarization = numpy.int32(1) if not polarization_checksum: polarization_checksum = crc32(polarization) if polarization_checksum != self.on_device["polarization"]: pyopencl.enqueue_copy(self._queue, self._cl_mem["polarization"], numpy.ascontiguousarray(polarization, dtype=numpy.float32)) self.on_device["polarization"] = polarization_checksum else: do_polarization = numpy.int32(0) self._cl_kernel_args["corrections"][7] = do_polarization copy_image.wait() if do_dummy + do_polarization + do_solidAngle + do_flat + do_dark > 0: self._program.corrections(self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["corrections"]).wait() memset.wait() integrate = self._program.lut_integrate(self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["lut_integrate"]) outMerge = numpy.zeros(self.bins, dtype=numpy.float32) outData = numpy.zeros(self.bins, dtype=numpy.float32) outCount = numpy.zeros(self.bins, dtype=numpy.float32) integrate.wait() pyopencl.enqueue_copy(self._queue, outMerge, self._cl_mem["outMerge"]).wait() pyopencl.enqueue_copy(self._queue, outData, self._cl_mem["outData"]).wait() pyopencl.enqueue_copy(self._queue, outCount, self._cl_mem["outCount"]).wait() return outMerge, outData, outCount