def setUpClass(cls): super(_TestKeypoints, cls).setUpClass() if ocl: cls.ctx = ocl.create_context() if logger.getEffectiveLevel() <= logging.INFO: cls.PROFILE = True cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: cls.PROFILE = False cls.queue = pyopencl.CommandQueue(cls.ctx) device = cls.ctx.devices[0] device_id = device.platform.get_devices().index(device) platform_id = pyopencl.get_platforms().index(device.platform) cls.maxwg = ocl.platforms[platform_id].devices[device_id].max_work_group_size
def __init__(self, image, devicetype="CPU", profile=False, device=None, max_workgroup_size=None, ROI=None, extra=0, context=None, init_sigma=None): """ Constructor of the class :param image: reference image on which other image should be aligned :param devicetype: Kind of preferred devce :param profile:collect profiling information ? :param device: 2-tuple of integer. see clinfo :param max_workgroup_size: limit the workgroup size :param ROI: Region of interest: to be implemented :param extra: extra space around the image, can be an integer, or a 2 tuple in YX convention: TODO! :param init_sigma: bluring width, you should have good reasons to modify the 1.6 default value... """ self.profile = bool(profile) self.events = [] self.program = None self.ref = numpy.ascontiguousarray(image, numpy.float32) self.buffers = {} self.shape = image.shape if len(self.shape) == 3: self.RGB = True self.shape = self.shape[:2] elif len(self.shape) == 2: self.RGB = False else: raise RuntimeError("Unable to process image of shape %s" % (tuple(self.shape, ))) if "__len__" not in dir(extra): self.extra = (int(extra), int(extra)) else: self.extra = extra[:2] self.outshape = tuple(i + 2 * j for i, j in zip(self.shape, self.extra)) self.ROI = ROI if context: self.ctx = context device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) device = platform.get_device(device_name) self.device = platform.id, device.id else: if device is None: self.device = ocl.select_device(type=devicetype, best=True) if self.device is None: raise RuntimeError( "No suitable OpenCL device found with given constrains" ) else: self.device = device self.ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[self.device[0]].get_devices()[ self.device[1]] ]) ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]] self.devicetype = ocldevice.type if max_workgroup_size: self.max_workgroup_size = min(int(max_workgroup_size), ocldevice.max_work_group_size) else: self.max_workgroup_size = ocldevice.max_work_group_size self.kernels = {} for k, v in self.__class__.kernels.items(): self.kernels[k] = min(v, self.max_workgroup_size) if self.RGB: target = (4, 8, 4) self.wg = tuple( min(t, i, self.max_workgroup_size) for t, i in zip( target, self.ctx.devices[0].max_work_item_sizes)) else: target = (8, 4) self.wg = tuple( min(t, i, self.max_workgroup_size) for t, i in zip( target, self.ctx.devices[0].max_work_item_sizes)) self.sift = SiftPlan(template=image, context=self.ctx, profile=self.profile, max_workgroup_size=self.max_workgroup_size, init_sigma=init_sigma) self.ref_kp = self.sift.keypoints(image) if self.ROI is not None: kpx = numpy.round(self.ref_kp.x).astype(numpy.int32) kpy = numpy.round(self.ref_kp.y).astype(numpy.int32) masked = self.ROI[(kpy, kpx)].astype(bool) logger.warning( "Reducing keypoint list from %i to %i because of the ROI" % (self.ref_kp.size, masked.sum())) self.ref_kp = self.ref_kp[masked] self.match = MatchPlan(context=self.ctx, profile=self.profile, max_workgroup_size=self.max_workgroup_size) # Allocate reference keypoints on the GPU within match context: self.buffers["ref_kp_gpu"] = pyopencl.array.to_device( self.match.queue, self.ref_kp) # TODO optimize match so that the keypoint2 can be optional self.fill_value = 0 # print self.ctx.devices[0] if self.profile: self.queue = pyopencl.CommandQueue( self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: self.queue = pyopencl.CommandQueue(self.ctx) self._compile_kernels() self._allocate_buffers() self.sem = Semaphore() self.relative_transfo = None
def __init__(self, size=16384, devicetype="ALL", profile=False, device=None, max_workgroup_size=None, roi=None, context=None): """Constructor of the class: :param size: size of the input keypoint-list alocated on the GPU. :param devicetype: can be CPU or GPU :param profile: set to true to activate profiling information collection :param device: 2-tuple of integer, see clinfo :param max_workgroup_size: CPU on MacOS, limit to 1. None by default to use default ones (max=128). :param roi: Region Of Interest: TODO :param context: Use an external context (discard devicetype and device options) """ self.profile = bool(profile) self.events = [] self.kpsize = size self.buffers = {} self.programs = {} self.memory = None self.octave_max = None self.red_size = None if context: self.ctx = context device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) device = platform.get_device(device_name) self.device = platform.id, device.id else: if device is None: self.device = ocl.select_device(type=devicetype, memory=self.memory, best=True) else: self.device = device self.ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[self.device[0]].get_devices()[ self.device[1]] ]) if profile: self.queue = pyopencl.CommandQueue( self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: self.queue = pyopencl.CommandQueue(self.ctx) # self._calc_workgroups() self._compile_kernels() self._allocate_buffers() self.debug = [] self._sem = threading.Semaphore() ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]] if max_workgroup_size: self.max_workgroup_size = min(int(max_workgroup_size), ocldevice.max_work_group_size) else: self.max_workgroup_size = ocldevice.max_work_group_size self.kernels = {} for k, v in self.__class__.kernels.items(): self.kernels[k] = min(v, self.max_workgroup_size) self.devicetype = ocldevice.type if (self.devicetype == "CPU"): self.USE_CPU = True self.matching_kernel = "matching_cpu" else: self.USE_CPU = False self.matching_kernel = "matching_gpu" self.roi = None if roi: self.set_roi(roi)
def __init__(self, shape=None, dtype=None, devicetype="ALL", template=None, profile=False, device=None, PIX_PER_KP=None, max_workgroup_size=None, context=None, init_sigma=None): """ Constructor of the class :param shape: shape of the input image :param dtype: data type of the input image :param devicetype: can be 'CPU' or 'GPU' :param template: extract shape and dtype from an image :param profile: collect timing info :param device: 2-tuple of integers :param PIX_PER_KP: number of keypoint pre-allocated: 1 for 10 pixel :param max_workgroup_size: set to 1 under macosX on CPU :param context: provide an external context """ if init_sigma is None: init_sigma = par.InitSigma # no test on the values, just make sure it is a float self._init_sigma = float(init_sigma) self.buffers = {} self.programs = {} if template is not None: self.shape = template.shape self.dtype = template.dtype else: self.shape = shape self.dtype = numpy.dtype(dtype) if len(self.shape) == 3: self.RGB = True self.shape = self.shape[:2] elif len(self.shape) == 2: self.RGB = False else: raise RuntimeError("Unable to process image of shape %s" % (tuple(self.shape, ))) if PIX_PER_KP: self.PIX_PER_KP = int(PIX_PER_KP) self.profile = bool(profile) self.events = [] self._sem = threading.Semaphore() self.scales = [] # in XY order self.procsize = [ ] # same as procsize but with dimension in (X,Y) not (slow, fast) self.wgsize = [] self.kpsize = None self.memory = None self.octave_max = None self.red_size = None self._calc_scales() self.max_workgroup_size = max_workgroup_size or 4096 self._calc_memory() self.LOW_END = 0 if context: self.ctx = context device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) device = platform.get_device(device_name) self.device = platform.id, device.id else: if device is None: self.device = ocl.select_device(type=devicetype, memory=self.memory, best=True) if self.device is None: self.device = ocl.select_device(memory=self.memory, best=True) if self.device: logger.warning( 'Unable to find suitable device. Selecting device: %s, %s' % self.device) if self.device is None: raise RuntimeError( "No suitable OpenCL device found with given constrains" ) else: self.device = device self.ctx = pyopencl.Context(devices=[ pyopencl.get_platforms()[self.device[0]].get_devices()[ self.device[1]] ]) if profile: self.queue = pyopencl.CommandQueue( self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) else: self.queue = pyopencl.CommandQueue(self.ctx) ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]] self._calc_workgroups() self._compile_kernels() self._allocate_buffers() self.debug = [] self.cnt = numpy.empty(1, dtype=numpy.int32) self.devicetype = ocldevice.type if (self.devicetype == "CPU"): self.USE_CPU = True else: self.USE_CPU = False if "HD Graphics" in ocldevice.name: self.LOW_END = 2