def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None): """ @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients @param checksum: pre - calculated checksum to prevent re - calculating it :) """ self.BLOCK_SIZE = 16 self._sem = threading.Semaphore() self._lut = lut self.bins, self.lut_size = lut.shape self.size = image_size if not checksum: checksum = crc32(self._lut) self.on_device = {"lut":checksum, "dark":None, "flat":None, "polarization":None, "solidangle":None} self._cl_kernel_args = {} self._cl_mem = {} if (platformid is None) and (deviceid is None): platformid, deviceid = ocl.select_device(devicetype) elif platformid is None: platformid = 0 elif deviceid is None: deviceid = 0 self.platform = ocl.platforms[platformid] self.device = self.platform.devices[deviceid] self.device_type = self.device.type if (self.device_type == "CPU") and (self.platform.vendor == "Apple"): logger.warning("This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1") self.BLOCK_SIZE = 1 self.workgroup_size = self.BLOCK_SIZE, self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), try: self._ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) self._queue = pyopencl.CommandQueue(self._ctx) self._allocate_buffers() self._compile_kernels() self._set_kernel_arguments() except pyopencl.MemoryError as error: raise MemoryError(error) if self.device_type == "CPU": pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut) else: pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())
def __init__(self, size=16384, devicetype="CPU", profile=False, device=None, max_workgroup_size=None, roi=None, context=None): """Constructor of the class: :param size: size of the input keypoint-list alocated on the GPU. :param devicetype: can be CPU or GPU :param profile: set to true to activate profiling information collection :param device: 2-tuple of integer, see clinfo :param max_workgroup_size: CPU on MacOS, limit to 1. None by default to use default ones (max=128). :param roi: Region Of Interest: TODO :param context: Use an external context (discard devicetype and device options) """ self.profile = bool(profile) self.events = [] self.kpsize = size self.buffers = {} self.programs = {} self.memory = None self.octave_max = None self.red_size = None if context: self.ctx = context device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) device = platform.get_device(device_name) self.device = platform.id, device.id else: if device is None: self.device = ocl.select_device(type=devicetype, memory=self.memory, best=True) else: self.device = device self.ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[self.device[0]].get_devices()[ self.device[1]] ]) if profile: self.queue = pyopencl.CommandQueue( self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: self.queue = pyopencl.CommandQueue(self.ctx) # self._calc_workgroups() self._compile_kernels() self._allocate_buffers() self.debug = [] self._sem = threading.Semaphore() ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]] if max_workgroup_size: self.max_workgroup_size = min(int(max_workgroup_size), ocldevice.max_work_group_size) else: self.max_workgroup_size = ocldevice.max_work_group_size self.kernels = {} for k, v in self.__class__.kernels.items(): self.kernels[k] = min(v, self.max_workgroup_size) self.devicetype = ocldevice.type if (self.devicetype == "CPU"): self.USE_CPU = True self.matching_kernel = "matching_cpu" else: self.USE_CPU = False self.matching_kernel = "matching_gpu" self.roi = None if roi: self.set_roi(roi)
def __init__(self, size=16384, devicetype="CPU", profile=False, device=None, max_workgroup_size=None, roi=None, context=None): """Constructor of the class: :param size: size of the input keypoint-list alocated on the GPU. :param devicetype: can be CPU or GPU :param profile: set to true to activate profiling information collection :param device: 2-tuple of integer, see clinfo :param max_workgroup_size: CPU on MacOS, limit to 1. None by default to use default ones (max=128). :param roi: Region Of Interest: TODO :param context: Use an external context (discard devicetype and device options) """ self.profile = bool(profile) self.events = [] self.kpsize = size self.buffers = {} self.programs = {} self.memory = None self.octave_max = None self.red_size = None if context: self.ctx = context device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) device = platform.get_device(device_name) self.device = platform.id, device.id else: if device is None: self.device = ocl.select_device(type=devicetype, memory=self.memory, best=True) else: self.device = device self.ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[self.device[0]].get_devices()[self.device[1]]]) if profile: self.queue = pyopencl.CommandQueue(self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: self.queue = pyopencl.CommandQueue(self.ctx) # self._calc_workgroups() self._compile_kernels() self._allocate_buffers() self.debug = [] self._sem = threading.Semaphore() ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]] if max_workgroup_size: self.max_workgroup_size = min(int(max_workgroup_size), ocldevice.max_work_group_size) else: self.max_workgroup_size = ocldevice.max_work_group_size self.kernels = {} for k, v in self.__class__.kernels.items(): self.kernels[k] = min(v, self.max_workgroup_size) self.devicetype = ocldevice.type if (self.devicetype == "CPU"): self.USE_CPU = True self.matching_kernel = "matching_cpu" else: self.USE_CPU = False self.matching_kernel = "matching_gpu" self.roi = None if roi: self.set_roi(roi)
def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None): """ @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients @param checksum: pre - calculated checksum to prevent re - calculating it :) """ self.BLOCK_SIZE = 16 self._sem = threading.Semaphore() self._lut = lut self.bins, self.lut_size = lut.shape self.size = image_size if not checksum: checksum = crc32(self._lut) self.on_device = { "lut": checksum, "dark": None, "flat": None, "polarization": None, "solidangle": None } self._cl_kernel_args = {} self._cl_mem = {} if (platformid is None) and (deviceid is None): platformid, deviceid = ocl.select_device(devicetype) elif platformid is None: platformid = 0 elif deviceid is None: deviceid = 0 self.platform = ocl.platforms[platformid] self.device = self.platform.devices[deviceid] self.device_type = self.device.type if (self.device_type == "CPU") and (self.platform.vendor == "Apple"): logger.warning( "This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1" ) self.BLOCK_SIZE = 1 self.workgroup_size = self.BLOCK_SIZE, self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1), try: self._ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[platformid].get_devices()[deviceid] ]) self._queue = pyopencl.CommandQueue(self._ctx) self._allocate_buffers() self._compile_kernels() self._set_kernel_arguments() except pyopencl.MemoryError as error: raise MemoryError(error) if self.device_type == "CPU": pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut) else: pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())