Beispiel #1
0
    def __init__(self, device):

        self.api_id = get_id()

        self._device = device
        self.max_work_group_size = device.max_threads_per_block
        self.max_work_item_sizes = [
            device.max_block_dim_x, device.max_block_dim_y,
            device.max_block_dim_z
        ]

        self.max_num_groups = [
            device.max_grid_dim_x, device.max_grid_dim_y, device.max_grid_dim_z
        ]

        # there is no corresponding constant in the API at the moment
        self.local_mem_banks = 16 if device.compute_capability()[0] < 2 else 32

        self.warp_size = device.warp_size

        devdata = DeviceData(device)
        self.min_mem_coalesce_width = dict(
            ((size, devdata.align_words(word_size=size))
             for size in [4, 8, 16]))
        self.local_mem_size = device.max_shared_memory_per_block

        self.compute_units = device.multiprocessor_count
Beispiel #2
0
    def __init__(self, device):

        self.api_id = get_id()

        self._device = device
        self.max_work_group_size = device.max_threads_per_block
        self.max_work_item_sizes = [
            device.max_block_dim_x,
            device.max_block_dim_y,
            device.max_block_dim_z]

        self.max_num_groups = [
            device.max_grid_dim_x,
            device.max_grid_dim_y,
            device.max_grid_dim_z]

        # there is no corresponding constant in the API at the moment
        self.local_mem_banks = 16 if device.compute_capability()[0] < 2 else 32

        self.warp_size = device.warp_size

        devdata = DeviceData(device)
        self.min_mem_coalesce_width = dict(
            ((size,devdata.align_words(word_size=size)) for size in [4, 8, 16]))
        self.local_mem_size = device.max_shared_memory_per_block

        self.compute_units = device.multiprocessor_count
Beispiel #3
0
    def __init__(self, device, stream, mempool):

        self._stream = stream
        self._recreate_stream = stream is None

        devdata = DeviceData(device)

        self.min_mem_coalesce_width = {}
        for size in [4, 8, 16]:
            self.min_mem_coalesce_width[size] = devdata.align_words(
                word_size=size)

        self.num_smem_banks = devdata.smem_granularity
        self.max_registers = device.get_attribute(
            device_attribute.MAX_REGISTERS_PER_BLOCK)
        self.max_grid_x = 2**log2(
            device.get_attribute(device_attribute.MAX_GRID_DIM_X))
        self.max_grid_y = 2**log2(
            device.get_attribute(device_attribute.MAX_GRID_DIM_Y))
        self.max_block_size = device.get_attribute(
            device_attribute.MAX_BLOCK_DIM_X)
        self.max_shared_mem = device.get_attribute(
            device_attribute.MAX_SHARED_MEMORY_PER_BLOCK)

        if mempool is None:
            self.allocate = cuda.mem_alloc
        else:
            self._mempool = mempool
            self.allocate = mempool.allocate
Beispiel #4
0
	def __init__(self, device, stream, mempool):

		self._stream = stream
		self._recreate_stream = stream is None

		devdata = DeviceData(device)

		self.min_mem_coalesce_width = {}
		for size in [4, 8, 16]:
			self.min_mem_coalesce_width[size] = devdata.align_words(word_size=size)

		self.num_smem_banks = devdata.smem_granularity
		self.max_registers = device.get_attribute(device_attribute.MAX_REGISTERS_PER_BLOCK)
		self.max_grid_x = 2 ** log2(device.get_attribute(device_attribute.MAX_GRID_DIM_X))
		self.max_grid_y = 2 ** log2(device.get_attribute(device_attribute.MAX_GRID_DIM_Y))
		self.max_block_size = device.get_attribute(device_attribute.MAX_BLOCK_DIM_X)
		self.max_shared_mem = device.get_attribute(device_attribute.MAX_SHARED_MEMORY_PER_BLOCK)

		if mempool is None:
			self.allocate = cuda.mem_alloc
		else:
			self._mempool = mempool
			self.allocate = mempool.allocate