def init_cuda(self): self.device_id, self.cuda_device = select_device() log("init_cuda() device_id=%s, device info: %s", self.device_id, device_info(self.cuda_device)) #use alias to make code easier to read: d = self.cuda_device da = driver.device_attribute fa = driver.function_attribute cf = driver.ctx_flags self.cuda_context = self.cuda_device.make_context(flags=cf.SCHED_AUTO | cf.MAP_HOST) try: log("init_cuda() cuda_device=%s, cuda_context=%s, thread=%s", self.cuda_device, self.cuda_context, threading.currentThread()) #compile/get kernel: self.kernel_function_name, self.kernel_function = get_CUDA_csc_function(self.device_id, self.src_format, self.dst_format) self.max_block_sizes = d.get_attribute(da.MAX_BLOCK_DIM_X), d.get_attribute(da.MAX_BLOCK_DIM_Y), d.get_attribute(da.MAX_BLOCK_DIM_Z) self.max_grid_sizes = d.get_attribute(da.MAX_GRID_DIM_X), d.get_attribute(da.MAX_GRID_DIM_Y), d.get_attribute(da.MAX_GRID_DIM_Z) log("max_block_sizes=%s", self.max_block_sizes) log("max_grid_sizes=%s", self.max_grid_sizes) self.max_threads_per_block = self.kernel_function.get_attribute(fa.MAX_THREADS_PER_BLOCK) log("max_threads_per_block=%s", self.max_threads_per_block) #query info with device context active, and cache it for later: self.pycuda_info = get_pycuda_info() self.cuda_device_info = { "context.api_version" : self.cuda_context.get_api_version(), "device.name" : d.name(), "device.pci_bus_id" : d.pci_bus_id(), } finally: self.cuda_context.pop() self.convert_image_fn = self.convert_image_rgb log("init_context(..) convert_image=%s", self.convert_image)
def init_cuda(self): self.device_id, self.cuda_device = select_device() log("init_cuda() device_id=%s, device info: %s", self.device_id, device_info(self.cuda_device)) #use alias to make code easier to read: d = self.cuda_device da = driver.device_attribute fa = driver.function_attribute cf = driver.ctx_flags self.cuda_context = self.cuda_device.make_context(flags=cf.SCHED_AUTO | cf.MAP_HOST) try: log("init_cuda() cuda_device=%s, cuda_context=%s, thread=%s", self.cuda_device, self.cuda_context, threading.currentThread()) #compile/get kernel: self.kernel_function_name, self.kernel_function = get_CUDA_csc_function( self.device_id, self.src_format, self.dst_format) self.max_block_sizes = d.get_attribute( da.MAX_BLOCK_DIM_X), d.get_attribute( da.MAX_BLOCK_DIM_Y), d.get_attribute(da.MAX_BLOCK_DIM_Z) self.max_grid_sizes = d.get_attribute( da.MAX_GRID_DIM_X), d.get_attribute( da.MAX_GRID_DIM_Y), d.get_attribute(da.MAX_GRID_DIM_Z) log("max_block_sizes=%s", self.max_block_sizes) log("max_grid_sizes=%s", self.max_grid_sizes) self.max_threads_per_block = self.kernel_function.get_attribute( fa.MAX_THREADS_PER_BLOCK) log("max_threads_per_block=%s", self.max_threads_per_block) #query info with device context active, and cache it for later: self.pycuda_info = get_pycuda_info() self.cuda_device_info = { "context.api_version": self.cuda_context.get_api_version(), "device.name": d.name(), "device.pci_bus_id": d.pci_bus_id(), } finally: self.cuda_context.pop() self.convert_image_fn = self.convert_image_rgb log("init_context(..) convert_image=%s", self.convert_image)
def get_info(): return get_pycuda_info()