def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None): """ @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients @param checksum: pre - calculated checksum to prevent re - calculating it :) """ self.BLOCK_SIZE = 16 self._sem = threading.Semaphore() self._lut = lut self.bins, self.lut_size = lut.shape self.size = image_size if not checksum: checksum = crc32(self._lut) self.on_device = {"lut":checksum, "dark":None, "flat":None, "polarization":None, "solidangle":None} self._cl_kernel_args = {} self._cl_mem = {} if (platformid is None) and (deviceid is None): platformid, deviceid = ocl.select_device(devicetype) elif platformid is None: platformid = 0 elif deviceid is None: deviceid = 0 self.platform = ocl.platforms[platformid] self.device = self.platform.devices[deviceid] self.device_type = self.device.type if (self.device_type == "CPU") and (self.platform.vendor == "Apple"): logger.warning("This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1") self.BLOCK_SIZE = 1 self.workgroup_size = self.BLOCK_SIZE, self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), try: self._ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) self._queue = pyopencl.CommandQueue(self._ctx) self._allocate_buffers() self._compile_kernels() self._set_kernel_arguments() except pyopencl.MemoryError as error: raise MemoryError(error) if self.device_type == "CPU": pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut) else: pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())
def match(self, nkp1, nkp2, raw_results=False): """Calculate the matching of 2 keypoint list :param nkp1, nkp2: numpy 1D recarray of keypoints or equivalent GPU buffer :param raw_results: if true return the 2D array of indexes of matching keypoints (not the actual keypoints) TODO: implement the ROI ... """ assert len( nkp1.shape) == 1 # Nota: nkp1.ndim is not valid for gpu_arrays assert len(nkp2.shape) == 1 valid_types = (numpy.ndarray, numpy.core.records.recarray, pyopencl.array.Array) assert isinstance(nkp1, valid_types) assert isinstance(nkp2, valid_types) result = None with self._sem: if isinstance(nkp1, pyopencl.array.Array): kpt1_gpu = nkp1 else: if nkp1.size > self.buffers["Kp_1"].size: logger.warning( "increasing size of keypoint vector 1 to %i" % nkp1.size) self.buffers["Kp_1"] = pyopencl.array.empty( self.queue, (nkp1.size, ), dtype=self.dtype_kp) kpt1_gpu = self.buffers["Kp_1"] self._reset_buffer1() evt1 = pyopencl.enqueue_copy(self.queue, kpt1_gpu.data, nkp1) if self.profile: self.events.append(("copy H->D KP_1", evt1)) if isinstance(nkp2, pyopencl.array.Array): kpt2_gpu = nkp2 else: if nkp2.size > self.buffers["Kp_2"].size: logger.warning( "increasing size of keypoint vector 2 to %i" % nkp2.size) self.buffers["Kp_2"] = pyopencl.array.empty( self.queue, (nkp2.size, ), dtype=self.dtype_kp) kpt2_gpu = self.buffers["Kp_2"] self._reset_buffer2() evt2 = pyopencl.enqueue_copy(self.queue, kpt2_gpu.data, nkp2) if self.profile: self.events.append(("copy H->D KP_2", evt2)) if min(kpt1_gpu.size, kpt2_gpu.size) > self.buffers["match"].shape[0]: self.kpsize = min(kpt1_gpu.size, kpt2_gpu.size) self.buffers["match"] = pyopencl.array.empty(self.queue, (self.kpsize, 2), dtype=numpy.int32) self._reset_output() wg = self.kernels[self.matching_kernel + ".matching"] size = calc_size((nkp1.size, ), (wg, )) evt = self.programs[self.matching_kernel].matching( self.queue, size, (wg, ), kpt1_gpu.data, kpt2_gpu.data, self.buffers["match"].data, self.buffers["cnt"].data, numpy.int32(self.kpsize), numpy.float32(par.MatchRatio * par.MatchRatio), numpy.int32(nkp1.size), numpy.int32(nkp2.size)) if self.profile: self.events.append(("matching", evt)) size = self.buffers["cnt"].get()[0] match = numpy.empty(shape=(size, 2), dtype=numpy.int32) if size > 0: cpyD2H = pyopencl.enqueue_copy(self.queue, match, self.buffers["match"].data) if self.profile: self.events.append(("copy D->H match", cpyD2H)) if raw_results: result = match else: result = numpy.recarray(shape=(size, 2), dtype=self.dtype_kp) result[:, 0] = nkp1[match[:size, 0]] result[:, 1] = nkp2[match[:size, 1]] return result
def match(self, nkp1, nkp2, raw_results=False): """Calculate the matching of 2 keypoint list :param nkp1, nkp2: numpy 1D recarray of keypoints or equivalent GPU buffer :param raw_results: if true return the 2D array of indexes of matching keypoints (not the actual keypoints) TODO: implement the ROI ... """ assert len(nkp1.shape) == 1 # Nota: nkp1.ndim is not valid for gpu_arrays assert len(nkp2.shape) == 1 valid_types = (numpy.ndarray, numpy.core.records.recarray, pyopencl.array.Array) assert isinstance(nkp1, valid_types) assert isinstance(nkp2, valid_types) result = None with self._sem: if isinstance(nkp1, pyopencl.array.Array): kpt1_gpu = nkp1 else: if nkp1.size > self.buffers["Kp_1"].size: logger.warning("increasing size of keypoint vector 1 to %i" % nkp1.size) self.buffers["Kp_1"] = pyopencl.array.empty(self.queue, (nkp1.size,), dtype=self.dtype_kp) kpt1_gpu = self.buffers["Kp_1"] self._reset_buffer1() evt1 = pyopencl.enqueue_copy(self.queue, kpt1_gpu.data, nkp1) if self.profile: self.events.append(("copy H->D KP_1", evt1)) if isinstance(nkp2, pyopencl.array.Array): kpt2_gpu = nkp2 else: if nkp2.size > self.buffers["Kp_2"].size: logger.warning("increasing size of keypoint vector 2 to %i" % nkp2.size) self.buffers["Kp_2"] = pyopencl.array.empty(self.queue, (nkp2.size,), dtype=self.dtype_kp) kpt2_gpu = self.buffers["Kp_2"] self._reset_buffer2() evt2 = pyopencl.enqueue_copy(self.queue, kpt2_gpu.data, nkp2) if self.profile: self.events.append(("copy H->D KP_2", evt2)) if min(kpt1_gpu.size, kpt2_gpu.size) > self.buffers["match"].shape[0]: self.kpsize = min(kpt1_gpu.size, kpt2_gpu.size) self.buffers["match"] = pyopencl.array.empty(self.queue, (self.kpsize, 2), dtype=numpy.int32) self._reset_output() wg = self.kernels[self.matching_kernel+".matching"] size = calc_size((nkp1.size,), (wg,)) evt = self.programs[self.matching_kernel].matching(self.queue, size, (wg,), kpt1_gpu.data, kpt2_gpu.data, self.buffers["match"].data, self.buffers["cnt"].data, numpy.int32(self.kpsize), numpy.float32(par.MatchRatio * par.MatchRatio), numpy.int32(nkp1.size), numpy.int32(nkp2.size)) if self.profile: self.events.append(("matching", evt)) size = self.buffers["cnt"].get()[0] match = numpy.empty(shape=(size, 2), dtype=numpy.int32) if size > 0: cpyD2H = pyopencl.enqueue_copy(self.queue, match, self.buffers["match"].data) if self.profile: self.events.append(("copy D->H match", cpyD2H)) if raw_results: result = match else: result = numpy.recarray(shape=(size, 2), dtype=self.dtype_kp) result[:, 0] = nkp1[match[:size, 0]] result[:, 1] = nkp2[match[:size, 1]] return result
def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None): """ @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients @param checksum: pre - calculated checksum to prevent re - calculating it :) """ self.BLOCK_SIZE = 16 self._sem = threading.Semaphore() self._lut = lut self.bins, self.lut_size = lut.shape self.size = image_size if not checksum: checksum = crc32(self._lut) self.on_device = { "lut": checksum, "dark": None, "flat": None, "polarization": None, "solidangle": None } self._cl_kernel_args = {} self._cl_mem = {} if (platformid is None) and (deviceid is None): platformid, deviceid = ocl.select_device(devicetype) elif platformid is None: platformid = 0 elif deviceid is None: deviceid = 0 self.platform = ocl.platforms[platformid] self.device = self.platform.devices[deviceid] self.device_type = self.device.type if (self.device_type == "CPU") and (self.platform.vendor == "Apple"): logger.warning( "This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1" ) self.BLOCK_SIZE = 1 self.workgroup_size = self.BLOCK_SIZE, self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), try: self._ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[platformid].get_devices()[deviceid] ]) self._queue = pyopencl.CommandQueue(self._ctx) self._allocate_buffers() self._compile_kernels() self._set_kernel_arguments() except pyopencl.MemoryError as error: raise MemoryError(error) if self.device_type == "CPU": pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut) else: pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())
def integrate(self, data, dummy=None, delta_dummy=None, dark=None, flat=None, solidAngle=None, polarization=None, dark_checksum=None, flat_checksum=None, solidAngle_checksum=None, polarization_checksum=None): with self._sem: if data.dtype == numpy.uint16: copy_image = pyopencl.enqueue_copy( self._queue, self._cl_mem["image_u16"], numpy.ascontiguousarray(data)) cast_u16_to_float = self._program.u16_to_float( self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["u16_to_float"]) elif data.dtype == numpy.int32: copy_image = pyopencl.enqueue_copy( self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data)) cast_s32_to_float = self._program.s32_to_float( self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["s32_to_float"]) else: copy_image = pyopencl.enqueue_copy( self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data, dtype=numpy.float32)) memset = self._program.memset_out( self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["memset_out"]) if dummy is not None: do_dummy = numpy.int32(1) dummy = numpy.float32(dummy) if delta_dummy == None: delta_dummy = numpy.float32(0) else: delta_dummy = numpy.float32(abs(delta_dummy)) else: do_dummy = numpy.int32(0) dummy = numpy.float32(0) delta_dummy = numpy.float32(0) self._cl_kernel_args["corrections"][9] = do_dummy self._cl_kernel_args["corrections"][10] = dummy self._cl_kernel_args["corrections"][11] = delta_dummy self._cl_kernel_args["lut_integrate"][2] = do_dummy self._cl_kernel_args["lut_integrate"][3] = dummy if dark is not None: do_dark = numpy.int32(1) if not dark_checksum: dark_checksum = crc32(dark) if dark_checksum != self.on_device["dark"]: pyopencl.enqueue_copy( self._queue, self._cl_mem["dark"], numpy.ascontiguousarray(dark, dtype=numpy.float32)) self.on_device["dark"] = dark_checksum else: do_dark = numpy.int32(0) self._cl_kernel_args["corrections"][1] = do_dark if flat is not None: do_flat = numpy.int32(1) if not flat_checksum: flat_checksum = crc32(flat) if self.on_device["flat"] != flat_checksum: pyopencl.enqueue_copy( self._queue, self._cl_mem["flat"], numpy.ascontiguousarray(flat, dtype=numpy.float32)) self.on_device["flat"] = flat_checksum else: do_flat = numpy.int32(0) self._cl_kernel_args["corrections"][3] = do_flat if solidAngle is not None: do_solidAngle = numpy.int32(1) if not solidAngle_checksum: solidAngle_checksum = crc32(solidAngle) if solidAngle_checksum != self.on_device["solidangle"]: pyopencl.enqueue_copy( self._queue, self._cl_mem["solidangle"], numpy.ascontiguousarray(solidAngle, dtype=numpy.float32)) self.on_device["solidangle"] = solidAngle_checksum else: do_solidAngle = numpy.int32(0) self._cl_kernel_args["corrections"][5] = do_solidAngle if polarization is not None: do_polarization = numpy.int32(1) if not polarization_checksum: polarization_checksum = crc32(polarization) if polarization_checksum != self.on_device["polarization"]: pyopencl.enqueue_copy( self._queue, self._cl_mem["polarization"], numpy.ascontiguousarray(polarization, dtype=numpy.float32)) self.on_device["polarization"] = polarization_checksum else: do_polarization = numpy.int32(0) self._cl_kernel_args["corrections"][7] = do_polarization copy_image.wait() if do_dummy + do_polarization + do_solidAngle + do_flat + do_dark > 0: self._program.corrections( self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["corrections"]).wait() memset.wait() integrate = self._program.lut_integrate( self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["lut_integrate"]) outMerge = numpy.zeros(self.bins, dtype=numpy.float32) outData = numpy.zeros(self.bins, dtype=numpy.float32) outCount = numpy.zeros(self.bins, dtype=numpy.float32) integrate.wait() pyopencl.enqueue_copy(self._queue, outMerge, self._cl_mem["outMerge"]).wait() pyopencl.enqueue_copy(self._queue, outData, self._cl_mem["outData"]).wait() pyopencl.enqueue_copy(self._queue, outCount, self._cl_mem["outCount"]).wait() return outMerge, outData, outCount
def integrate(self, data, dummy=None, delta_dummy=None, dark=None, flat=None, solidAngle=None, polarization=None, dark_checksum=None, flat_checksum=None, solidAngle_checksum=None, polarization_checksum=None): with self._sem: if data.dtype == numpy.uint16: copy_image = pyopencl.enqueue_copy(self._queue, self._cl_mem["image_u16"], numpy.ascontiguousarray(data)) cast_u16_to_float = self._program.u16_to_float(self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["u16_to_float"]) elif data.dtype == numpy.int32: copy_image = pyopencl.enqueue_copy(self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data)) cast_s32_to_float = self._program.s32_to_float(self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["s32_to_float"]) else: copy_image = pyopencl.enqueue_copy(self._queue, self._cl_mem["image"], numpy.ascontiguousarray(data, dtype=numpy.float32)) memset = self._program.memset_out(self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["memset_out"]) if dummy is not None: do_dummy = numpy.int32(1) dummy = numpy.float32(dummy) if delta_dummy == None: delta_dummy = numpy.float32(0) else: delta_dummy = numpy.float32(abs(delta_dummy)) else: do_dummy = numpy.int32(0) dummy = numpy.float32(0) delta_dummy = numpy.float32(0) self._cl_kernel_args["corrections"][9] = do_dummy self._cl_kernel_args["corrections"][10] = dummy self._cl_kernel_args["corrections"][11] = delta_dummy self._cl_kernel_args["lut_integrate"][2] = do_dummy self._cl_kernel_args["lut_integrate"][3] = dummy if dark is not None: do_dark = numpy.int32(1) if not dark_checksum: dark_checksum = crc32(dark) if dark_checksum != self.on_device["dark"]: pyopencl.enqueue_copy(self._queue, self._cl_mem["dark"], numpy.ascontiguousarray(dark, dtype=numpy.float32)) self.on_device["dark"] = dark_checksum else: do_dark = numpy.int32(0) self._cl_kernel_args["corrections"][1] = do_dark if flat is not None: do_flat = numpy.int32(1) if not flat_checksum: flat_checksum = crc32(flat) if self.on_device["flat"] != flat_checksum: pyopencl.enqueue_copy(self._queue, self._cl_mem["flat"], numpy.ascontiguousarray(flat, dtype=numpy.float32)) self.on_device["flat"] = flat_checksum else: do_flat = numpy.int32(0) self._cl_kernel_args["corrections"][3] = do_flat if solidAngle is not None: do_solidAngle = numpy.int32(1) if not solidAngle_checksum: solidAngle_checksum = crc32(solidAngle) if solidAngle_checksum != self.on_device["solidangle"]: pyopencl.enqueue_copy(self._queue, self._cl_mem["solidangle"], numpy.ascontiguousarray(solidAngle, dtype=numpy.float32)) self.on_device["solidangle"] = solidAngle_checksum else: do_solidAngle = numpy.int32(0) self._cl_kernel_args["corrections"][5] = do_solidAngle if polarization is not None: do_polarization = numpy.int32(1) if not polarization_checksum: polarization_checksum = crc32(polarization) if polarization_checksum != self.on_device["polarization"]: pyopencl.enqueue_copy(self._queue, self._cl_mem["polarization"], numpy.ascontiguousarray(polarization, dtype=numpy.float32)) self.on_device["polarization"] = polarization_checksum else: do_polarization = numpy.int32(0) self._cl_kernel_args["corrections"][7] = do_polarization copy_image.wait() if do_dummy + do_polarization + do_solidAngle + do_flat + do_dark > 0: self._program.corrections(self._queue, self.wdim_data, self.workgroup_size, *self._cl_kernel_args["corrections"]).wait() memset.wait() integrate = self._program.lut_integrate(self._queue, self.wdim_bins, self.workgroup_size, *self._cl_kernel_args["lut_integrate"]) outMerge = numpy.zeros(self.bins, dtype=numpy.float32) outData = numpy.zeros(self.bins, dtype=numpy.float32) outCount = numpy.zeros(self.bins, dtype=numpy.float32) integrate.wait() pyopencl.enqueue_copy(self._queue, outMerge, self._cl_mem["outMerge"]).wait() pyopencl.enqueue_copy(self._queue, outData, self._cl_mem["outData"]).wait() pyopencl.enqueue_copy(self._queue, outCount, self._cl_mem["outCount"]).wait() return outMerge, outData, outCount