Example #1
0
    def __init__(self, lut, image_size, devicetype="all", platformid=None, deviceid=None, checksum=None):
        """
        @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients
        @param checksum: pre - calculated checksum to prevent re - calculating it :)
        """
        self.BLOCK_SIZE = 16
        self._sem = threading.Semaphore()
        self._lut = lut
        self.bins, self.lut_size = lut.shape
        self.size = image_size
        if not checksum:
            checksum = crc32(self._lut)
        self.on_device = {"lut":checksum, "dark":None, "flat":None, "polarization":None, "solidangle":None}
        self._cl_kernel_args = {}
        self._cl_mem = {}

        if (platformid is None) and (deviceid is None):
            platformid, deviceid = ocl.select_device(devicetype)
        elif platformid is None:
            platformid = 0
        elif deviceid is None:
            deviceid = 0
        self.platform = ocl.platforms[platformid]
        self.device = self.platform.devices[deviceid]
        self.device_type = self.device.type
        if (self.device_type == "CPU") and (self.platform.vendor == "Apple"):
            logger.warning("This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1")
            self.BLOCK_SIZE = 1
        self.workgroup_size = self.BLOCK_SIZE,
        self.wdim_bins = (self.bins + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1),
        self.wdim_data = (self.size + self.BLOCK_SIZE - 1) & ~(self.BLOCK_SIZE - 1),
        try:
            self._ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
            self._queue = pyopencl.CommandQueue(self._ctx)
            self._allocate_buffers()
            self._compile_kernels()
            self._set_kernel_arguments()
        except pyopencl.MemoryError as error:
            raise MemoryError(error)
        if self.device_type == "CPU":
            pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut)
        else:
            pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut.T.copy())
Example #2
0
    def __init__(self,
                 size=16384,
                 devicetype="CPU",
                 profile=False,
                 device=None,
                 max_workgroup_size=None,
                 roi=None,
                 context=None):
        """Constructor of the class:

        :param size: size of the input keypoint-list alocated on the GPU.
        :param devicetype: can be CPU or GPU
        :param profile: set to true to activate profiling information collection
        :param device: 2-tuple of integer, see clinfo
        :param max_workgroup_size: CPU on MacOS, limit to 1. None by default to use default ones (max=128).
        :param roi: Region Of Interest: TODO
        :param context: Use an external context (discard devicetype and device options)
        """
        self.profile = bool(profile)
        self.events = []
        self.kpsize = size
        self.buffers = {}
        self.programs = {}
        self.memory = None
        self.octave_max = None
        self.red_size = None
        if context:
            self.ctx = context
            device_name = self.ctx.devices[0].name.strip()
            platform_name = self.ctx.devices[0].platform.name.strip()
            platform = ocl.get_platform(platform_name)
            device = platform.get_device(device_name)
            self.device = platform.id, device.id
        else:
            if device is None:
                self.device = ocl.select_device(type=devicetype,
                                                memory=self.memory,
                                                best=True)
            else:
                self.device = device
            self.ctx = pyopencl.Context(devices=[
                pyopencl.get_platforms()[self.device[0]].get_devices()[
                    self.device[1]]
            ])
        if profile:
            self.queue = pyopencl.CommandQueue(
                self.ctx,
                properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
        else:
            self.queue = pyopencl.CommandQueue(self.ctx)


#        self._calc_workgroups()
        self._compile_kernels()
        self._allocate_buffers()
        self.debug = []
        self._sem = threading.Semaphore()

        ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]]

        if max_workgroup_size:
            self.max_workgroup_size = min(int(max_workgroup_size),
                                          ocldevice.max_work_group_size)
        else:
            self.max_workgroup_size = ocldevice.max_work_group_size
        self.kernels = {}
        for k, v in self.__class__.kernels.items():
            self.kernels[k] = min(v, self.max_workgroup_size)

        self.devicetype = ocldevice.type
        if (self.devicetype == "CPU"):
            self.USE_CPU = True
            self.matching_kernel = "matching_cpu"
        else:
            self.USE_CPU = False
            self.matching_kernel = "matching_gpu"
        self.roi = None
        if roi:
            self.set_roi(roi)
Example #3
0
    def __init__(self, size=16384, devicetype="CPU", profile=False, device=None, max_workgroup_size=None, roi=None, context=None):
        """Constructor of the class:

        :param size: size of the input keypoint-list alocated on the GPU.
        :param devicetype: can be CPU or GPU
        :param profile: set to true to activate profiling information collection
        :param device: 2-tuple of integer, see clinfo
        :param max_workgroup_size: CPU on MacOS, limit to 1. None by default to use default ones (max=128).
        :param roi: Region Of Interest: TODO
        :param context: Use an external context (discard devicetype and device options)
        """
        self.profile = bool(profile)
        self.events = []
        self.kpsize = size
        self.buffers = {}
        self.programs = {}
        self.memory = None
        self.octave_max = None
        self.red_size = None
        if context:
            self.ctx = context
            device_name = self.ctx.devices[0].name.strip()
            platform_name = self.ctx.devices[0].platform.name.strip()
            platform = ocl.get_platform(platform_name)
            device = platform.get_device(device_name)
            self.device = platform.id, device.id
        else:
            if device is None:
                self.device = ocl.select_device(type=devicetype, memory=self.memory, best=True)
            else:
                self.device = device
            self.ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[self.device[0]].get_devices()[self.device[1]]])
        if profile:
            self.queue = pyopencl.CommandQueue(self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
        else:
            self.queue = pyopencl.CommandQueue(self.ctx)
#        self._calc_workgroups()
        self._compile_kernels()
        self._allocate_buffers()
        self.debug = []
        self._sem = threading.Semaphore()

        ocldevice = ocl.platforms[self.device[0]].devices[self.device[1]]

        if max_workgroup_size:
            self.max_workgroup_size = min(int(max_workgroup_size), ocldevice.max_work_group_size)
        else:
            self.max_workgroup_size = ocldevice.max_work_group_size
        self.kernels = {}
        for k, v in self.__class__.kernels.items():
            self.kernels[k] = min(v, self.max_workgroup_size)

        self.devicetype = ocldevice.type
        if (self.devicetype == "CPU"):
            self.USE_CPU = True
            self.matching_kernel = "matching_cpu"
        else:
            self.USE_CPU = False
            self.matching_kernel = "matching_gpu"
        self.roi = None
        if roi:
            self.set_roi(roi)
Example #4
0
    def __init__(self,
                 lut,
                 image_size,
                 devicetype="all",
                 platformid=None,
                 deviceid=None,
                 checksum=None):
        """
        @param lut: array of int32 - float32 with shape (nbins, lut_size) with indexes and coefficients
        @param checksum: pre - calculated checksum to prevent re - calculating it :)
        """
        self.BLOCK_SIZE = 16
        self._sem = threading.Semaphore()
        self._lut = lut
        self.bins, self.lut_size = lut.shape
        self.size = image_size
        if not checksum:
            checksum = crc32(self._lut)
        self.on_device = {
            "lut": checksum,
            "dark": None,
            "flat": None,
            "polarization": None,
            "solidangle": None
        }
        self._cl_kernel_args = {}
        self._cl_mem = {}

        if (platformid is None) and (deviceid is None):
            platformid, deviceid = ocl.select_device(devicetype)
        elif platformid is None:
            platformid = 0
        elif deviceid is None:
            deviceid = 0
        self.platform = ocl.platforms[platformid]
        self.device = self.platform.devices[deviceid]
        self.device_type = self.device.type
        if (self.device_type == "CPU") and (self.platform.vendor == "Apple"):
            logger.warning(
                "This is a workaround for Apple's OpenCL on CPU: enforce BLOCK_SIZE=1"
            )
            self.BLOCK_SIZE = 1
        self.workgroup_size = self.BLOCK_SIZE,
        self.wdim_bins = (self.bins + self.BLOCK_SIZE -
                          1) & ~(self.BLOCK_SIZE - 1),
        self.wdim_data = (self.size + self.BLOCK_SIZE -
                          1) & ~(self.BLOCK_SIZE - 1),
        try:
            self._ctx = pyopencl.Context(devices=[
                pyopencl.get_platforms()[platformid].get_devices()[deviceid]
            ])
            self._queue = pyopencl.CommandQueue(self._ctx)
            self._allocate_buffers()
            self._compile_kernels()
            self._set_kernel_arguments()
        except pyopencl.MemoryError as error:
            raise MemoryError(error)
        if self.device_type == "CPU":
            pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"], lut)
        else:
            pyopencl.enqueue_copy(self._queue, self._cl_mem["lut"],
                                  lut.T.copy())