Ejemplo n.º 1
0
 def setUpClass(cls):
     super(_TestKeypoints, cls).setUpClass()
     if ocl:
         cls.ctx = ocl.create_context()
         if logger.getEffectiveLevel() <= logging.INFO:
             cls.PROFILE = True
             cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
         else:
             cls.PROFILE = False
             cls.queue = pyopencl.CommandQueue(cls.ctx)
         device = cls.ctx.devices[0]
         device_id = device.platform.get_devices().index(device)
         platform_id = pyopencl.get_platforms().index(device.platform)
         cls.maxwg = ocl.platforms[platform_id].devices[device_id].max_work_group_size
Ejemplo n.º 2
0
    def __init__(self,
                 image,
                 devicetype="CPU",
                 profile=False,
                 device=None,
                 max_workgroup_size=None,
                 ROI=None,
                 extra=0,
                 context=None,
                 init_sigma=None):
        """
        Constructor of the class

        :param image: reference image on which other image should be aligned
        :param devicetype: Kind of preferred devce
        :param profile:collect profiling information ?
        :param device: 2-tuple of integer. see clinfo
        :param max_workgroup_size: limit the workgroup size
        :param ROI: Region of interest: to be implemented
        :param extra: extra space around the image, can be an integer, or a 2 tuple in YX convention: TODO!
        :param init_sigma: bluring width, you should have good reasons to modify the 1.6 default value...
        """
        self.profile = bool(profile)
        self.events = []
        self.program = None
        self.ref = numpy.ascontiguousarray(image, numpy.float32)
        self.buffers = {}
        self.shape = image.shape
        if len(self.shape) == 3:
            self.RGB = True
            self.shape = self.shape[:2]
        elif len(self.shape) == 2:
            self.RGB = False
        else:
            raise RuntimeError("Unable to process image of shape %s" %
                               (tuple(self.shape, )))
        if "__len__" not in dir(extra):
            self.extra = (int(extra), int(extra))
        else:
            self.extra = extra[:2]
        self.outshape = tuple(i + 2 * j
                              for i, j in zip(self.shape, self.extra))
        self.ROI = ROI
        if context:
            self.ctx = context
            device_name = self.ctx.devices[0].name.strip()
            platform_name = self.ctx.devices[0].platform.name.strip()
            platform = ocl.get_platform(platform_name)
            device = platform.get_device(device_name)
            self.device = platform.id, device.id
        else:
            if device is None:
                self.device = ocl.select_device(type=devicetype, best=True)
                if self.device is None:
                    raise RuntimeError(
                        "No suitable OpenCL device found with given constrains"
                    )
            else:
                self.device = device
            self.ctx = pyopencl.Context(devices=[
                pyopencl.get_platforms()[self.device[0]].get_devices()[
                    self.device[1]]
            ])
        ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]]
        self.devicetype = ocldevice.type

        if max_workgroup_size:
            self.max_workgroup_size = min(int(max_workgroup_size),
                                          ocldevice.max_work_group_size)
        else:
            self.max_workgroup_size = ocldevice.max_work_group_size
        self.kernels = {}
        for k, v in self.__class__.kernels.items():
            self.kernels[k] = min(v, self.max_workgroup_size)

        if self.RGB:
            target = (4, 8, 4)
            self.wg = tuple(
                min(t, i, self.max_workgroup_size) for t, i in zip(
                    target, self.ctx.devices[0].max_work_item_sizes))
        else:
            target = (8, 4)
            self.wg = tuple(
                min(t, i, self.max_workgroup_size) for t, i in zip(
                    target, self.ctx.devices[0].max_work_item_sizes))

        self.sift = SiftPlan(template=image,
                             context=self.ctx,
                             profile=self.profile,
                             max_workgroup_size=self.max_workgroup_size,
                             init_sigma=init_sigma)
        self.ref_kp = self.sift.keypoints(image)
        if self.ROI is not None:
            kpx = numpy.round(self.ref_kp.x).astype(numpy.int32)
            kpy = numpy.round(self.ref_kp.y).astype(numpy.int32)
            masked = self.ROI[(kpy, kpx)].astype(bool)
            logger.warning(
                "Reducing keypoint list from %i to %i because of the ROI" %
                (self.ref_kp.size, masked.sum()))
            self.ref_kp = self.ref_kp[masked]
        self.match = MatchPlan(context=self.ctx,
                               profile=self.profile,
                               max_workgroup_size=self.max_workgroup_size)
        #        Allocate reference keypoints on the GPU within match context:
        self.buffers["ref_kp_gpu"] = pyopencl.array.to_device(
            self.match.queue, self.ref_kp)
        # TODO optimize match so that the keypoint2 can be optional
        self.fill_value = 0
        #        print self.ctx.devices[0]
        if self.profile:
            self.queue = pyopencl.CommandQueue(
                self.ctx,
                properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
        else:
            self.queue = pyopencl.CommandQueue(self.ctx)
        self._compile_kernels()
        self._allocate_buffers()
        self.sem = Semaphore()
        self.relative_transfo = None
Ejemplo n.º 3
0
    def __init__(self,
                 size=16384,
                 devicetype="ALL",
                 profile=False,
                 device=None,
                 max_workgroup_size=None,
                 roi=None,
                 context=None):
        """Constructor of the class:

        :param size: size of the input keypoint-list alocated on the GPU.
        :param devicetype: can be CPU or GPU
        :param profile: set to true to activate profiling information collection
        :param device: 2-tuple of integer, see clinfo
        :param max_workgroup_size: CPU on MacOS, limit to 1. None by default to use default ones (max=128).
        :param roi: Region Of Interest: TODO
        :param context: Use an external context (discard devicetype and device options)
        """
        self.profile = bool(profile)
        self.events = []
        self.kpsize = size
        self.buffers = {}
        self.programs = {}
        self.memory = None
        self.octave_max = None
        self.red_size = None
        if context:
            self.ctx = context
            device_name = self.ctx.devices[0].name.strip()
            platform_name = self.ctx.devices[0].platform.name.strip()
            platform = ocl.get_platform(platform_name)
            device = platform.get_device(device_name)
            self.device = platform.id, device.id
        else:
            if device is None:
                self.device = ocl.select_device(type=devicetype,
                                                memory=self.memory,
                                                best=True)
            else:
                self.device = device
            self.ctx = pyopencl.Context(devices=[
                pyopencl.get_platforms()[self.device[0]].get_devices()[
                    self.device[1]]
            ])
        if profile:
            self.queue = pyopencl.CommandQueue(
                self.ctx,
                properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
        else:
            self.queue = pyopencl.CommandQueue(self.ctx)
        # self._calc_workgroups()
        self._compile_kernels()
        self._allocate_buffers()
        self.debug = []
        self._sem = threading.Semaphore()

        ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]]

        if max_workgroup_size:
            self.max_workgroup_size = min(int(max_workgroup_size),
                                          ocldevice.max_work_group_size)
        else:
            self.max_workgroup_size = ocldevice.max_work_group_size
        self.kernels = {}
        for k, v in self.__class__.kernels.items():
            self.kernels[k] = min(v, self.max_workgroup_size)

        self.devicetype = ocldevice.type
        if (self.devicetype == "CPU"):
            self.USE_CPU = True
            self.matching_kernel = "matching_cpu"
        else:
            self.USE_CPU = False
            self.matching_kernel = "matching_gpu"
        self.roi = None
        if roi:
            self.set_roi(roi)
Ejemplo n.º 4
0
    def __init__(self,
                 shape=None,
                 dtype=None,
                 devicetype="ALL",
                 template=None,
                 profile=False,
                 device=None,
                 PIX_PER_KP=None,
                 max_workgroup_size=None,
                 context=None,
                 init_sigma=None):
        """
        Constructor of the class

        :param shape: shape of the input image
        :param dtype: data type of the input image
        :param devicetype: can be 'CPU' or 'GPU'
        :param template: extract shape and dtype from an image
        :param profile: collect timing info
        :param device: 2-tuple of integers
        :param PIX_PER_KP: number of keypoint pre-allocated: 1 for 10 pixel
        :param max_workgroup_size: set to 1 under macosX on CPU
        :param context: provide an external context
        """
        if init_sigma is None:
            init_sigma = par.InitSigma
        # no test on the values, just make sure it is a float
        self._init_sigma = float(init_sigma)
        self.buffers = {}
        self.programs = {}
        if template is not None:
            self.shape = template.shape
            self.dtype = template.dtype
        else:
            self.shape = shape
            self.dtype = numpy.dtype(dtype)
        if len(self.shape) == 3:
            self.RGB = True
            self.shape = self.shape[:2]
        elif len(self.shape) == 2:
            self.RGB = False
        else:
            raise RuntimeError("Unable to process image of shape %s" %
                               (tuple(self.shape, )))
        if PIX_PER_KP:
            self.PIX_PER_KP = int(PIX_PER_KP)
        self.profile = bool(profile)
        self.events = []
        self._sem = threading.Semaphore()
        self.scales = []  # in XY order
        self.procsize = [
        ]  # same as  procsize but with dimension in (X,Y) not (slow, fast)
        self.wgsize = []
        self.kpsize = None
        self.memory = None
        self.octave_max = None
        self.red_size = None
        self._calc_scales()
        self.max_workgroup_size = max_workgroup_size or 4096
        self._calc_memory()
        self.LOW_END = 0
        if context:
            self.ctx = context
            device_name = self.ctx.devices[0].name.strip()
            platform_name = self.ctx.devices[0].platform.name.strip()
            platform = ocl.get_platform(platform_name)
            device = platform.get_device(device_name)
            self.device = platform.id, device.id
        else:
            if device is None:
                self.device = ocl.select_device(type=devicetype,
                                                memory=self.memory,
                                                best=True)
                if self.device is None:
                    self.device = ocl.select_device(memory=self.memory,
                                                    best=True)
                    if self.device:
                        logger.warning(
                            'Unable to find suitable device. Selecting device: %s, %s'
                            % self.device)
                if self.device is None:
                    raise RuntimeError(
                        "No suitable OpenCL device found with given constrains"
                    )
            else:
                self.device = device
            self.ctx = pyopencl.Context(devices=[
                pyopencl.get_platforms()[self.device[0]].get_devices()[
                    self.device[1]]
            ])

        if profile:
            self.queue = pyopencl.CommandQueue(
                self.ctx,
                properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
        else:
            self.queue = pyopencl.CommandQueue(self.ctx)
        ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]]
        self._calc_workgroups()
        self._compile_kernels()
        self._allocate_buffers()
        self.debug = []
        self.cnt = numpy.empty(1, dtype=numpy.int32)
        self.devicetype = ocldevice.type
        if (self.devicetype == "CPU"):
            self.USE_CPU = True
        else:
            self.USE_CPU = False
            if "HD Graphics" in ocldevice.name:
                self.LOW_END = 2