def bin_number(cls, size): bl2 = bitlog2(size) mantissa_bits = cls.mantissa_bits if bl2 >= mantissa_bits: shifted = size >> (bl2 - mantissa_bits) else: shifted = size << (mantissa_bits - bl2) assert not (size and (shifted & (1 << mantissa_bits)) == 0) chopped = shifted & cls.mantissa_mask return bl2 << mantissa_bits | chopped
def get_dev_group_size(device): # dirty fix for the RV770 boards max_work_group_size = device.max_work_group_size if "RV770" in device.name: max_work_group_size = 64 # compute lmem limit from pytools import div_ceil lmem_wg_size = div_ceil(max_work_group_size, out_type_size) result = min(max_work_group_size, lmem_wg_size) # round down to power of 2 from pyopencl.tools import bitlog2 return 2**bitlog2(result)
def bin_number(cls, size): l = bitlog2(size) mantissa_bits = cls.mantissa_bits if l >= mantissa_bits: shifted = size >> (l - mantissa_bits) else: shifted = size << (mantissa_bits - l) assert not (size and (shifted & (1 << mantissa_bits)) == 0) chopped = shifted & cls.mantissa_mask return l << mantissa_bits | chopped
def _is_power_of_2(n): from pyopencl.tools import bitlog2 return n == 0 or 2**bitlog2(n) == n
def _is_power_of_2(n): from pyopencl.tools import bitlog2 return n == 0 or 2 ** bitlog2(n) == n