Python DeviceData.align_words Beispiele

Programmiersprache: Python

Namespace / Paketname: pycuda.tools

Klasse / Typ: DeviceData

Methode / Funktion: align_words

Beispiele auf hotexamples.com: 4

Python DeviceData.align_words - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pycuda.tools.DeviceData.align_words, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

DeviceData(12)

align_words(2)

Beispiel #1

Datei anzeigen

    def __init__(self, device):

        self.api_id = get_id()

        self._device = device
        self.max_work_group_size = device.max_threads_per_block
        self.max_work_item_sizes = [
            device.max_block_dim_x, device.max_block_dim_y,
            device.max_block_dim_z
        ]

        self.max_num_groups = [
            device.max_grid_dim_x, device.max_grid_dim_y, device.max_grid_dim_z
        ]

        # there is no corresponding constant in the API at the moment
        self.local_mem_banks = 16 if device.compute_capability()[0] < 2 else 32

        self.warp_size = device.warp_size

        devdata = DeviceData(device)
        self.min_mem_coalesce_width = dict(
            ((size, devdata.align_words(word_size=size))
             for size in [4, 8, 16]))
        self.local_mem_size = device.max_shared_memory_per_block

        self.compute_units = device.multiprocessor_count

Beispiel #2

Datei anzeigen

Datei: cuda.py Projekt: fjarri/reikna

    def __init__(self, device):

        self.api_id = get_id()

        self._device = device
        self.max_work_group_size = device.max_threads_per_block
        self.max_work_item_sizes = [
            device.max_block_dim_x,
            device.max_block_dim_y,
            device.max_block_dim_z]

        self.max_num_groups = [
            device.max_grid_dim_x,
            device.max_grid_dim_y,
            device.max_grid_dim_z]

        # there is no corresponding constant in the API at the moment
        self.local_mem_banks = 16 if device.compute_capability()[0] < 2 else 32

        self.warp_size = device.warp_size

        devdata = DeviceData(device)
        self.min_mem_coalesce_width = dict(
            ((size,devdata.align_words(word_size=size)) for size in [4, 8, 16]))
        self.local_mem_size = device.max_shared_memory_per_block

        self.compute_units = device.multiprocessor_count

Beispiel #3

Datei anzeigen

    def __init__(self, device, stream, mempool):

        self._stream = stream
        self._recreate_stream = stream is None

        devdata = DeviceData(device)

        self.min_mem_coalesce_width = {}
        for size in [4, 8, 16]:
            self.min_mem_coalesce_width[size] = devdata.align_words(
                word_size=size)

        self.num_smem_banks = devdata.smem_granularity
        self.max_registers = device.get_attribute(
            device_attribute.MAX_REGISTERS_PER_BLOCK)
        self.max_grid_x = 2**log2(
            device.get_attribute(device_attribute.MAX_GRID_DIM_X))
        self.max_grid_y = 2**log2(
            device.get_attribute(device_attribute.MAX_GRID_DIM_Y))
        self.max_block_size = device.get_attribute(
            device_attribute.MAX_BLOCK_DIM_X)
        self.max_shared_mem = device.get_attribute(
            device_attribute.MAX_SHARED_MEMORY_PER_BLOCK)

        if mempool is None:
            self.allocate = cuda.mem_alloc
        else:
            self._mempool = mempool
            self.allocate = mempool.allocate

Beispiel #4

Datei anzeigen

Datei: cuda.py Projekt: Mistobaan/pyfft

	def __init__(self, device, stream, mempool):

		self._stream = stream
		self._recreate_stream = stream is None

		devdata = DeviceData(device)

		self.min_mem_coalesce_width = {}
		for size in [4, 8, 16]:
			self.min_mem_coalesce_width[size] = devdata.align_words(word_size=size)

		self.num_smem_banks = devdata.smem_granularity
		self.max_registers = device.get_attribute(device_attribute.MAX_REGISTERS_PER_BLOCK)
		self.max_grid_x = 2 ** log2(device.get_attribute(device_attribute.MAX_GRID_DIM_X))
		self.max_grid_y = 2 ** log2(device.get_attribute(device_attribute.MAX_GRID_DIM_Y))
		self.max_block_size = device.get_attribute(device_attribute.MAX_BLOCK_DIM_X)
		self.max_shared_mem = device.get_attribute(device_attribute.MAX_SHARED_MEMORY_PER_BLOCK)

		if mempool is None:
			self.allocate = cuda.mem_alloc
		else:
			self._mempool = mempool
			self.allocate = mempool.allocate