def __init__(self, tpname, fields, declname=None, align_bytes=None, aligned_prime_to=[]): """Initialize a structure declarator. *tpname* is the name of the structure, while *declname* is the name used for the declarator. *pad_bytes* is the number of padding bytes added at the end of the structure. *fields* is a list of :class:`Declarator` instances. *align_bytes* is an integer that causes the structure to be padded to an integer multiple of itself. *aligned_prime_to* is a list of integers. If the resulting structure's size is ``s``, then ``s//align_bytes`` will be made prime to all numbers in *aligned_prime_to*. (Sounds obscure? It's needed for avoiding bank conflicts in CUDA programming.) """ format = "".join(f.struct_format() for f in fields) bytes = _struct.calcsize(format) natural_align_bytes = max(f.alignment_requirement() for f in fields) if align_bytes is None: align_bytes = natural_align_bytes elif align_bytes < natural_align_bytes: from warnings import warn warn("requested struct alignment smaller than natural alignment") self.align_bytes = align_bytes padded_bytes = ((bytes + align_bytes - 1) // align_bytes) * align_bytes def satisfies_primality(n): from pymbolic.algorithm import gcd for p in aligned_prime_to: if gcd(n, p) != 1: return False return True while not satisfies_primality(padded_bytes // align_bytes): padded_bytes += align_bytes self.get_init().__init__(self, tpname, fields, declname, padded_bytes - bytes) if self.pad_bytes: self.format = format + "%dx" % self.pad_bytes self.bytes = padded_bytes else: self.format = format self.bytes = bytes assert _struct.calcsize(self.format) == self.bytes
def __init__(self, tpname, fields, declname=None, align_bytes=None, aligned_prime_to=[]): """Initialize a structure declarator. *tpname* is the name of the structure, while *declname* is the name used for the declarator. *pad_bytes* is the number of padding bytes added at the end of the structure. *fields* is a list of :class:`Declarator` instances. *align_bytes* is an integer that causes the structure to be padded to an integer multiple of itself. *aligned_prime_to* is a list of integers. If the resulting structure's size is ``s``, then ``s//align_bytes`` will be made prime to all numbers in *aligned_prime_to*. (Sounds obscure? It's needed for avoiding bank conflicts in CUDA programming.) """ format = "".join(f.struct_format() for f in fields) bytes = _struct.calcsize(format) natural_align_bytes = max(f.alignment_requirement() for f in fields) if align_bytes is None: align_bytes = natural_align_bytes elif align_bytes < natural_align_bytes: from warnings import warn warn("requested struct alignment smaller than natural alignment") self.align_bytes = align_bytes padded_bytes = ((bytes + align_bytes - 1) // align_bytes) * align_bytes def satisfies_primality(n): from pymbolic.algorithm import gcd for p in aligned_prime_to: if gcd(n, p) != 1: return False return True while not satisfies_primality(padded_bytes // align_bytes): padded_bytes += align_bytes Struct.__init__(self, tpname, fields, declname, padded_bytes - bytes) if self.pad_bytes: self.format = format + "%dx" % self.pad_bytes self.bytes = padded_bytes else: self.format = format self.bytes = bytes assert _struct.calcsize(self.format) == self.bytes
def function_prepare_pre_v4(func, arg_types, block=None, shared=None, texrefs=[]): from warnings import warn if block is not None: warn("setting the block size in Function.prepare is deprecated", DeprecationWarning, stacklevel=2) func._set_block_shape(*block) if shared is not None: warn("setting the shared memory size in Function.prepare is deprecated", DeprecationWarning, stacklevel=2) func._set_shared_size(shared) func.texrefs = texrefs func.arg_format = "" param_size = 0 for i, arg_type in enumerate(arg_types): if isinstance(arg_type, type) and np is not None and np.number in arg_type.__mro__: func.arg_format += np.dtype(arg_type).char elif isinstance(arg_type, str): func.arg_format += arg_type else: func.arg_format += np.dtype(np.intp).char from pycuda._pvt_struct import calcsize func._param_set_size(calcsize(func.arg_format)) return func
def function_prepare(func, arg_types, block, shared=None, texrefs=[]): func.set_block_shape(*block) if shared is not None: func.set_shared_size(shared) func.texrefs = texrefs try: import numpy except ImportError: numpy = None func.arg_format = "" param_size = 0 for i, arg_type in enumerate(arg_types): if isinstance( arg_type, type ) and numpy is not None and numpy.number in arg_type.__mro__: func.arg_format += numpy.dtype(arg_type).char elif isinstance(arg_type, str): func.arg_format += arg_type else: func.arg_format += numpy.dtype(numpy.intp).char from pycuda._pvt_struct import calcsize func.param_set_size(calcsize(func.arg_format)) return func
def function_prepare(func, arg_types, block, shared=None, texrefs=[]): func.set_block_shape(*block) if shared is not None: func.set_shared_size(shared) func.texrefs = texrefs try: import numpy except ImportError: numpy = None func.arg_format = "" param_size = 0 for i, arg_type in enumerate(arg_types): if isinstance(arg_type, type) and numpy is not None and numpy.number in arg_type.__mro__: func.arg_format += numpy.dtype(arg_type).char elif isinstance(arg_type, str): func.arg_format += arg_type else: func.arg_format += numpy.dtype(numpy.intp).char from pycuda._pvt_struct import calcsize func.param_set_size(calcsize(func.arg_format)) return func
def is_64_bit_platform(): return _struct.calcsize('l') == 8
def alignment_requirement(self): return _struct.calcsize(self.struct_format())
def is64bit(): return _struct.calcsize('1') == 8
def is_long_64_bit(): return _struct.calcsize('l') == 8