def cdef(self): """ Return a :class:`cgen.Struct` representing the profiler data structure in C (a ``struct``). """ return Struct('profiler', [Value('double', i.name) for i in self._sections])
def _C_typedecl(self): if self._is_composite_dtype: return Struct( self.pname, [Value(ctypes_to_cstr(j), i) for i, j in self.pfields]) else: return None
def _C_typedecl(self): fields = [] for i, j in self.pfields: if i == self._field_flag: fields.append(Initializer(Value('volatile %s' % ctypes_to_cstr(j), i), 1)) else: fields.append(Value(ctypes_to_cstr(j), i)) return Struct(self.pname, fields)
def make_superblocks(devdata, struct_name, single_item, multi_item, extra_fields={}): from hedge.backends.cuda.tools import pad_and_join # single_item = [([ block1, block2, ... ], decl), ...] # multi_item = [([ [ item1, item2, ...], ... ], decl), ...] multi_blocks = [ ["".join(s) for s in part_data] for part_data, part_decls in multi_item] block_sizes = [ max(len(b) for b in part_blocks) for part_blocks in multi_blocks] from pytools import single_valued block_count = single_valued( len(si_part_blocks) for si_part_blocks, si_part_decl in single_item) from cgen import Struct, ArrayOf struct_members = [] for part_data, part_decl in single_item: assert block_count == len(part_data) single_valued(len(block) for block in part_data) struct_members.append(part_decl) for part_data, part_decl in multi_item: struct_members.append( ArrayOf(part_decl, max(len(s) for s in part_data))) superblocks = [] for superblock_num in range(block_count): data = "" for part_data, part_decl in single_item: data += part_data[superblock_num] for part_blocks, part_size in zip(multi_blocks, block_sizes): assert block_count == len(part_blocks) data += pad(part_blocks[superblock_num], part_size) superblocks.append(data) superblock_size = devdata.align( single_valued(len(sb) for sb in superblocks)) data = pad_and_join(superblocks, superblock_size) assert len(data) == superblock_size*block_count class SuperblockedDataStructure(Record): pass return SuperblockedDataStructure( struct=Struct(struct_name, struct_members), device_memory=cuda.to_device(data), block_bytes=superblock_size, data=data, **extra_fields )
def _C_neighbours(self): """A :class:`ctypes.Struct` to access the neighborhood of a given rank.""" entries = list(product(self.dimensions, [LEFT, RIGHT])) fields = [('%s%s' % (d, i), c_int) for d, i in entries] obj = CompositeObject('nb', 'neighbours', Structure, fields) for d, i in entries: setattr(obj.value._obj, '%s%s' % (d, i), self.neighbours[d][i]) cdef = Struct('neighbours', [Value('int', i) for i, _ in fields]) CNeighbours = namedtuple('CNeighbours', 'ctype cdef obj') return CNeighbours(obj.dtype, cdef, obj)
def test_cgen(): s = Struct( "yuck", [ POD( np.float32, "h", ), POD(np.float32, "order"), POD(np.float32, "face_jacobian"), ArrayOf(POD(np.float32, "normal"), 17), POD(np.uint16, "a_base"), POD(np.uint16, "b_base"), #CudaGlobal(POD(np.uint8, "a_ilist_number")), POD(np.uint8, "b_ilist_number"), POD(np.uint8, "bdry_flux_number"), # 0 if not on boundary POD(np.uint8, "reserved"), POD(np.uint32, "b_global_base"), ]) f_decl = FunctionDeclaration(POD(np.uint16, "get_num"), [ POD(np.uint8, "reserved"), POD(np.uint32, "b_global_base"), ]) f_body = FunctionBody( f_decl, Block([ POD(np.uint32, "i"), For( "i = 0", "i < 17", "++i", If( "a > b", Assign("a", "b"), Block([ Assign("a", "b-1"), #Break(), ])), ), #BlankLine(), Comment("all done"), ])) t_decl = Template( 'typename T', FunctionDeclaration( Value('CUdeviceptr', 'scan'), [Value('CUdeviceptr', 'inputPtr'), Value('int', 'length')])) print(s) print(f_body) print(t_decl)
def get_elwise_module_descriptor(arguments, operation, name="kernel"): from codepy.bpl import BoostPythonModule from cgen import FunctionBody, FunctionDeclaration, \ Value, POD, Struct, For, Initializer, Include, Statement, \ Line, Block S = Statement # noqa: N806 mod = BoostPythonModule() mod.add_to_preamble([ Include("pyublas/numpy.hpp"), ]) mod.add_to_module([ S("namespace ublas = boost::numeric::ublas"), S("using namespace pyublas"), Line(), ]) body = Block([ Initializer( Value( "numpy_array<{} >::iterator".format(dtype_to_ctype( varg.dtype)), varg.name), f"args.{varg.name}_ary.begin()") for varg in arguments if isinstance(varg, VectorArg) ] + [ Initializer(sarg.declarator(), f"args.{sarg.name}") for sarg in arguments if isinstance(sarg, ScalarArg) ]) body.extend([ Line(), For("unsigned i = 0", "i < codepy_length", "++i", Block([S(operation)])) ]) arg_struct = Struct("arg_struct", [arg.declarator() for arg in arguments]) mod.add_struct(arg_struct, "ArgStruct") mod.add_to_module([Line()]) mod.add_function( FunctionBody( FunctionDeclaration(Value("void", name), [ POD(numpy.uintp, "codepy_length"), Value("arg_struct", "args") ]), body)) return mod
def _C_typedecl(self): # Overriding for better code readability # # Struct neighborhood Struct neighborhood # { { # int ll; int ll, lc, lr; # int lc; VS ... # int lr; ... # ... ... # } } # # With this override, we generate the one on the right groups = [list(g) for k, g in groupby(self.pfields, key=lambda x: x[0][0])] groups = [(j[0], i) for i, j in [zip(*g) for g in groups]] return Struct(self.pname, [Value(ctypes_to_cstr(i), ', '.join(j)) for i, j in groups])
def _C_typedecl(self): return Struct(self.pname, [Value(ctypes_to_cstr(j), i) for i, j in self.pfields])
class DiscreteFunction(AbstractCachedFunction, ArgProvider): """ Symbol representing a discrete array in symbolic equations. Unlike an Array, a DiscreteFunction carries data. Notes ----- Users should not instantiate this class directly. Use Function or SparseFunction (or their subclasses) instead. """ # Required by SymPy, otherwise the presence of __getitem__ will make SymPy # think that a DiscreteFunction is actually iterable, thus breaking many of # its key routines (e.g., solve) _iterable = False is_Input = True is_DiscreteFunction = True is_Tensor = True def __init__(self, *args, **kwargs): if not self._cached(): super(DiscreteFunction, self).__init__(*args, **kwargs) # There may or may not be a `Grid` attached to the DiscreteFunction self._grid = kwargs.get('grid') # A `Distributor` to handle domain decomposition (only relevant for MPI) self._distributor = self.__distributor_setup__(**kwargs) # Staggering metadata self._staggered = self.__staggered_setup__(**kwargs) # Symbolic (finite difference) coefficients self._coefficients = kwargs.get('coefficients', 'standard') if self._coefficients not in ('standard', 'symbolic'): raise ValueError("coefficients must be `standard` or `symbolic`") # Data-related properties and data initialization self._data = None self._first_touch = kwargs.get('first_touch', configuration['first-touch']) self._allocator = kwargs.get('allocator', default_allocator()) initializer = kwargs.get('initializer') if initializer is None or callable(initializer): # Initialization postponed until the first access to .data self._initializer = initializer elif isinstance(initializer, (np.ndarray, list, tuple)): # Allocate memory and initialize it. Note that we do *not* hold # a reference to the user-provided buffer self._initializer = None if len(initializer) > 0: self.data_with_halo[:] = initializer else: # This is a corner case -- we might get here, for example, when # running with MPI and some processes get 0-size arrays after # domain decomposition. We touch the data anyway to avoid the # case ``self._data is None`` self.data else: raise ValueError("`initializer` must be callable or buffer, not %s" % type(initializer)) def _allocate_memory(func): """Allocate memory as a Data.""" @wraps(func) def wrapper(self): if self._data is None: debug("Allocating memory for %s%s" % (self.name, self.shape_allocated)) self._data = Data(self.shape_allocated, self.dtype, modulo=self._mask_modulo, allocator=self._allocator) if self._first_touch: assign(self, 0) if callable(self._initializer): if self._first_touch: warning("`first touch` together with `initializer` causing " "redundant data initialization") try: self._initializer(self.data_with_halo) except ValueError: # Perhaps user only wants to initialise the physical domain self._initializer(self.data) else: self.data_with_halo.fill(0) return func(self) return wrapper @classmethod def __dtype_setup__(cls, **kwargs): grid = kwargs.get('grid') dtype = kwargs.get('dtype') if dtype is not None: return dtype elif grid is not None: return grid.dtype else: return np.float32 def __staggered_setup__(self, **kwargs): """ Setup staggering-related metadata. This method assigns: * 0 to non-staggered dimensions; * 1 to staggered dimensions. """ staggered = kwargs.get('staggered') if staggered is None: self.is_Staggered = False return tuple(0 for _ in self.indices) else: self.is_Staggered = True if staggered is NODE: staggered = () elif staggered is CELL: staggered = self.indices else: staggered = as_tuple(staggered) mask = [] for d in self.indices: if d in staggered: mask.append(1) elif -d in staggered: mask.append(-1) else: mask.append(0) return tuple(mask) def __distributor_setup__(self, **kwargs): grid = kwargs.get('grid') # There may or may not be a `Distributor`. In the latter case, the # DiscreteFunction is to be considered "local" to each MPI rank return kwargs.get('distributor') if grid is None else grid.distributor @cached_property def _functions(self): return {self.function} @property def _data_buffer(self): """ Reference to the data. Unlike :attr:`data` and :attr:`data_with_halo`, this *never* returns a view of the data. This method is for internal use only. """ return self._data_allocated @property def _data_alignment(self): return self._allocator.guaranteed_alignment @property def _mem_external(self): return True @property def grid(self): """The Grid on which the discretization occurred.""" return self._grid @property def staggered(self): return self._staggered @property def coefficients(self): """Form of the coefficients of the function.""" return self._coefficients @cached_property def _coeff_symbol(self): if self.coefficients == 'symbolic': return sympy.Function('W') else: raise ValueError("Function was not declared with symbolic " "coefficients.") @cached_property def shape(self): """ Shape of the domain region. The domain constitutes the area of the data written to by an Operator. Notes ----- In an MPI context, this is the *local* domain region shape. """ return self.shape_domain @cached_property def shape_domain(self): """ Shape of the domain region. The domain constitutes the area of the data written to by an Operator. Notes ----- In an MPI context, this is the *local* domain region shape. Alias to ``self.shape``. """ return tuple(i - j for i, j in zip(self._shape, self.staggered)) @cached_property def shape_with_halo(self): """ Shape of the domain+outhalo region. The outhalo is the region surrounding the domain that may be read by an Operator. Notes ----- In an MPI context, this is the *local* with_halo region shape. Further, note that the outhalo of inner ranks is typically empty, while the outhalo of boundary ranks contains a number of elements depending on the rank position in the decomposed grid (corner, side, ...). """ return tuple(j + i + k for i, (j, k) in zip(self.shape_domain, self._size_outhalo)) _shape_with_outhalo = shape_with_halo @cached_property def _shape_with_inhalo(self): """ Shape of the domain+inhalo region. The inhalo region comprises the outhalo as well as any additional "ghost" layers for MPI halo exchanges. Data in the inhalo region are exchanged when running Operators to maintain consistent values as in sequential runs. Notes ----- Typically, this property won't be used in user code, but it may come in handy for testing or debugging """ return tuple(j + i + k for i, (j, k) in zip(self.shape_domain, self._halo)) @cached_property def shape_allocated(self): """ Shape of the allocated data. It includes the domain and inhalo regions, as well as any additional padding surrounding the halo. Notes ----- In an MPI context, this is the *local* with_halo region shape. """ return tuple(j + i + k for i, (j, k) in zip(self._shape_with_inhalo, self._padding)) @cached_property def shape_global(self): """ Global shape of the domain region. The domain constitutes the area of the data written to by an Operator. Notes ----- In an MPI context, this is the *global* domain region shape, which is therefore identical on all MPI ranks. """ if self.grid is None: return self.shape retval = [] for d, s in zip(self.dimensions, self.shape): size = self.grid.dimension_map.get(d) retval.append(size.glb if size is not None else s) return tuple(retval) _offset_inhalo = AbstractCachedFunction._offset_halo _size_inhalo = AbstractCachedFunction._size_halo @cached_property def _size_outhalo(self): """Number of points in the outer halo region.""" if self._distributor is None: return self._size_inhalo left = [self._distributor.glb_to_loc(d, i, LEFT, strict=False) for d, i in zip(self.dimensions, self._size_inhalo.left)] right = [self._distributor.glb_to_loc(d, i, RIGHT, strict=False) for d, i in zip(self.dimensions, self._size_inhalo.right)] Size = namedtuple('Size', 'left right') sizes = tuple(Size(i, j) for i, j in zip(left, right)) return EnrichedTuple(*sizes, getters=self.dimensions, left=left, right=right) @cached_property def _mask_modulo(self): """Boolean mask telling which Dimensions support modulo-indexing.""" return tuple(True if i.is_Stepping else False for i in self.dimensions) @cached_property def _mask_domain(self): """Slice-based mask to access the domain region of the allocated data.""" return tuple(slice(i, j) for i, j in zip(self._offset_domain, self._offset_halo.right)) @cached_property def _mask_inhalo(self): """Slice-based mask to access the domain+inhalo region of the allocated data.""" return tuple(slice(i.left, i.right + j.right) for i, j in zip(self._offset_inhalo, self._size_inhalo)) @cached_property def _mask_outhalo(self): """Slice-based mask to access the domain+outhalo region of the allocated data.""" return tuple(slice(i.start - j.left, i.stop and i.stop + j.right or None) for i, j in zip(self._mask_domain, self._size_outhalo)) @cached_property def _decomposition(self): """ Tuple of Decomposition objects, representing the domain decomposition. None is used as a placeholder for non-decomposed Dimensions. """ if self._distributor is None: return (None,)*self.ndim mapper = {d: self._distributor.decomposition[d] for d in self._dist_dimensions} return tuple(mapper.get(d) for d in self.dimensions) @cached_property def _decomposition_outhalo(self): """ Tuple of Decomposition objects, representing the domain+outhalo decomposition. None is used as a placeholder for non-decomposed Dimensions. """ if self._distributor is None: return (None,)*self.ndim return tuple(v.reshape(*self._size_inhalo[d]) if v is not None else v for d, v in zip(self.dimensions, self._decomposition)) @property def data(self): """ The domain data values, as a numpy.ndarray. Elements are stored in row-major format. Notes ----- With this accessor you are claiming that you will modify the values you get back. If you only need to look at the values, use :meth:`data_ro` instead. """ return self.data_domain @property @_allocate_memory def data_domain(self): """ The domain data values. Elements are stored in row-major format. Notes ----- Alias to ``self.data``. With this accessor you are claiming that you will modify the values you get back. If you only need to look at the values, use :meth:`data_ro_domain` instead. """ self._is_halo_dirty = True return self._data._global(self._mask_domain, self._decomposition) @property @_allocate_memory def data_with_halo(self): """ The domain+outhalo data values. Elements are stored in row-major format. Notes ----- With this accessor you are claiming that you will modify the values you get back. If you only need to look at the values, use :meth:`data_ro_with_halo` instead. """ self._is_halo_dirty = True self._halo_exchange() return self._data._global(self._mask_outhalo, self._decomposition_outhalo) _data_with_outhalo = data_with_halo @property @_allocate_memory def _data_with_inhalo(self): """ The domain+inhalo data values. Elements are stored in row-major format. Notes ----- This accessor does *not* support global indexing. With this accessor you are claiming that you will modify the values you get back. If you only need to look at the values, use :meth:`data_ro_with_inhalo` instead. Typically, this accessor won't be used in user code to set or read data values. Instead, it may come in handy for testing or debugging """ self._is_halo_dirty = True self._halo_exchange() return np.asarray(self._data[self._mask_inhalo]) @property @_allocate_memory def _data_allocated(self): """ The allocated data values, that is domain+inhalo+padding. Elements are stored in row-major format. Notes ----- This accessor does *not* support global indexing. With this accessor you are claiming that you will modify the values you get back. If you only need to look at the values, use :meth:`data_ro_allocated` instead. Typically, this accessor won't be used in user code to set or read data values. Instead, it may come in handy for testing or debugging """ self._is_halo_dirty = True self._halo_exchange() return np.asarray(self._data) def _data_in_region(self, region, dim, side): """ The data values in a given region. Parameters ---------- region : DataRegion The data region of interest (e.g., OWNED, HALO) for which a view is produced. dim : Dimension The dimension of interest. side : DataSide The side of interest (LEFT, RIGHT). Notes ----- This accessor does *not* support global indexing. With this accessor you are claiming that you will modify the values you get back. Typically, this accessor won't be used in user code to set or read data values. """ self._is_halo_dirty = True offset = getattr(getattr(self, '_offset_%s' % region.name)[dim], side.name) size = getattr(getattr(self, '_size_%s' % region.name)[dim], side.name) index_array = [slice(offset, offset+size) if d is dim else slice(None) for d in self.dimensions] return np.asarray(self._data[index_array]) @property @_allocate_memory def data_ro_domain(self): """Read-only view of the domain data values.""" view = self._data._global(self._mask_domain, self._decomposition) view.setflags(write=False) return view @property @_allocate_memory def data_ro_with_halo(self): """Read-only view of the domain+outhalo data values.""" view = self._data._global(self._mask_outhalo, self._decomposition_outhalo) view.setflags(write=False) return view _data_ro_with_outhalo = data_ro_with_halo @property @_allocate_memory def _data_ro_with_inhalo(self): """ Read-only view of the domain+inhalo data values. Notes ----- This accessor does *not* support global indexing. """ view = self._data[self._mask_inhalo] view.setflags(write=False) return np.asarray(view) @property @_allocate_memory def _data_ro_allocated(self): """ Read-only view of the domain+inhalo+padding data values. Notes ----- This accessor does *not* support global indexing. """ view = self._data view.setflags(write=False) return np.asarray(view) @cached_property def local_indices(self): """ Tuple of slices representing the global indices that logically belong to the calling MPI rank. Notes ----- Given a Function ``f(x, y)`` with shape ``(nx, ny)``, when *not* using MPI this property will return ``(slice(0, nx-1), slice(0, ny-1))``. On the other hand, when MPI is used, the local ranges depend on the domain decomposition, which is carried by ``self.grid``. """ if self._distributor is None: return tuple(slice(0, s) for s in self.shape) else: return tuple(self._distributor.glb_slices.get(d, slice(0, s)) for s, d in zip(self.shape, self.dimensions)) @cached_property def space_dimensions(self): """Tuple of Dimensions defining the physical space.""" return tuple(d for d in self.indices if d.is_Space) @cached_property def _dist_dimensions(self): """Tuple of MPI-distributed Dimensions.""" if self._distributor is None: return () return tuple(d for d in self.indices if d in self._distributor.dimensions) @property def initializer(self): if self._data is not None: return self.data_with_halo.view(np.ndarray) else: return self._initializer @cached_property def symbolic_shape(self): """ The symbolic shape of the object. This includes: * the domain, halo, and padding regions. While halo and padding are known quantities (integers), the domain size is represented by a symbol. * the shifting induced by the ``staggered`` mask. """ symbolic_shape = super(DiscreteFunction, self).symbolic_shape ret = tuple(Add(i, -j) for i, j in zip(symbolic_shape, self.staggered)) return EnrichedTuple(*ret, getters=self.dimensions) _C_structname = 'dataobj' _C_typename = 'struct %s *' % _C_structname _C_field_data = 'data' _C_field_size = 'size' _C_field_nopad_size = 'npsize' _C_field_domain_size = 'dsize' _C_field_halo_size = 'hsize' _C_field_halo_ofs = 'hofs' _C_field_owned_ofs = 'oofs' _C_typedecl = Struct(_C_structname, [Value('%srestrict' % ctypes_to_cstr(c_void_p), _C_field_data), Value(ctypes_to_cstr(POINTER(c_int)), _C_field_size), Value(ctypes_to_cstr(POINTER(c_int)), _C_field_nopad_size), Value(ctypes_to_cstr(POINTER(c_int)), _C_field_domain_size), Value(ctypes_to_cstr(POINTER(c_int)), _C_field_halo_size), Value(ctypes_to_cstr(POINTER(c_int)), _C_field_halo_ofs), Value(ctypes_to_cstr(POINTER(c_int)), _C_field_owned_ofs)]) _C_ctype = POINTER(type(_C_structname, (Structure,), {'_fields_': [(_C_field_data, c_void_p), (_C_field_size, POINTER(c_int)), (_C_field_nopad_size, POINTER(c_int)), (_C_field_domain_size, POINTER(c_int)), (_C_field_halo_size, POINTER(c_int)), (_C_field_halo_ofs, POINTER(c_int)), (_C_field_owned_ofs, POINTER(c_int))]})) def _C_make_dataobj(self, data): """ A ctypes object representing the DiscreteFunction that can be passed to an Operator. """ dataobj = byref(self._C_ctype._type_()) dataobj._obj.data = data.ctypes.data_as(c_void_p) dataobj._obj.size = (c_int*self.ndim)(*data.shape) # MPI-related fields dataobj._obj.npsize = (c_int*self.ndim)(*[i - sum(j) for i, j in zip(data.shape, self._size_padding)]) dataobj._obj.dsize = (c_int*self.ndim)(*self._size_domain) dataobj._obj.hsize = (c_int*(self.ndim*2))(*flatten(self._size_halo)) dataobj._obj.hofs = (c_int*(self.ndim*2))(*flatten(self._offset_halo)) dataobj._obj.oofs = (c_int*(self.ndim*2))(*flatten(self._offset_owned)) return dataobj def _C_as_ndarray(self, dataobj): """Cast the data carried by a DiscreteFunction dataobj to an ndarray.""" shape = tuple(dataobj._obj.size[i] for i in range(self.ndim)) ctype_1d = dtype_to_ctype(self.dtype) * int(reduce(mul, shape)) buf = cast(dataobj._obj.data, POINTER(ctype_1d)).contents return np.frombuffer(buf, dtype=self.dtype).reshape(shape) @memoized_meth def _C_make_index(self, dim, side=None): # Depends on how fields are populated in `_C_make_dataobj` idx = self.dimensions.index(dim) if side is not None: idx = idx*2 + (0 if side is LEFT else 1) return idx @memoized_meth def _C_get_field(self, region, dim, side=None): """Symbolic representation of a given data region.""" ffp = lambda f, i: FieldFromPointer("%s[%d]" % (f, i), self._C_name) if region is DOMAIN: offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_domain_size, self._C_make_index(dim)) elif region is OWNED: if side is LEFT: offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, RIGHT)) elif side is CENTER: # Note: identical to region=HALO, side=CENTER offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_domain_size, self._C_make_index(dim)) else: offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, RIGHT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, LEFT)) elif region is HALO: if side is LEFT: offset = ffp(self._C_field_halo_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, LEFT)) elif side is CENTER: # Note: identical to region=OWNED, side=CENTER offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_domain_size, self._C_make_index(dim)) else: offset = ffp(self._C_field_halo_ofs, self._C_make_index(dim, RIGHT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, RIGHT)) elif region is NOPAD: offset = ffp(self._C_field_halo_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_nopad_size, self._C_make_index(dim)) elif region is FULL: offset = 0 size = ffp(self._C_field_size, self._C_make_index(dim)) else: raise ValueError("Unknown region `%s`" % str(region)) RegionMeta = namedtuple('RegionMeta', 'offset size') return RegionMeta(offset, size) def _halo_exchange(self): """Perform the halo exchange with the neighboring processes.""" if not MPI.Is_initialized() or MPI.COMM_WORLD.size == 1: # Nothing to do return if MPI.COMM_WORLD.size > 1 and self._distributor is None: raise RuntimeError("`%s` cannot perform a halo exchange as it has " "no Grid attached" % self.name) if self._in_flight: raise RuntimeError("`%s` cannot initiate a halo exchange as previous " "exchanges are still in flight" % self.name) for i in self.space_dimensions: self.__halo_begin_exchange(i) self.__halo_end_exchange(i) self._is_halo_dirty = False assert not self._in_flight def __halo_begin_exchange(self, dim): """Begin a halo exchange along a given Dimension.""" neighborhood = self._distributor.neighborhood comm = self._distributor.comm for i in [LEFT, RIGHT]: neighbor = neighborhood[dim][i] owned_region = self._data_in_region(OWNED, dim, i) halo_region = self._data_in_region(HALO, dim, i) sendbuf = np.ascontiguousarray(owned_region) recvbuf = np.ndarray(shape=halo_region.shape, dtype=self.dtype) self._in_flight.append((dim, i, recvbuf, comm.Irecv(recvbuf, neighbor))) self._in_flight.append((dim, i, None, comm.Isend(sendbuf, neighbor))) def __halo_end_exchange(self, dim): """End a halo exchange along a given Dimension.""" for d, i, payload, req in list(self._in_flight): if d == dim: status = MPI.Status() req.Wait(status=status) if payload is not None and status.source != MPI.PROC_NULL: # The MPI.Request `req` originated from a `comm.Irecv` # Now need to scatter the data to the right place self._data_in_region(HALO, d, i)[:] = payload self._in_flight.remove((d, i, payload, req)) @property def _arg_names(self): """Tuple of argument names introduced by this function.""" return (self.name,) @memoized_meth def _arg_defaults(self, alias=None): """ A map of default argument values defined by this symbol. Parameters ---------- alias : DiscreteFunction, optional To bind the argument values to different names. """ key = alias or self args = ReducerMap({key.name: self._data_buffer}) # Collect default dimension arguments from all indices for i, s, o in zip(key.indices, self.shape, self.staggered): args.update(i._arg_defaults(_min=0, size=s+o)) # Add MPI-related data structures if self.grid is not None: args.update(self.grid._arg_defaults()) return args def _arg_values(self, **kwargs): """ A map of argument values after evaluating user input. If no user input is provided, return a default value. Parameters ---------- **kwargs Dictionary of user-provided argument overrides. """ # Add value override for own data if it is provided, otherwise # use defaults if self.name in kwargs: new = kwargs.pop(self.name) if isinstance(new, DiscreteFunction): # Set new values and re-derive defaults values = new._arg_defaults(alias=self).reduce_all() else: # We've been provided a pure-data replacement (array) values = {self.name: new} # Add value overrides for all associated dimensions for i, s, o in zip(self.indices, new.shape, self.staggered): size = s + o - sum(self._size_nodomain[i]) values.update(i._arg_defaults(size=size)) # Add MPI-related data structures if self.grid is not None: values.update(self.grid._arg_defaults()) else: values = self._arg_defaults(alias=self).reduce_all() return values def _arg_check(self, args, intervals): """ Check that ``args`` contains legal runtime values bound to ``self``. Raises ------ InvalidArgument If, given the runtime values ``args``, an out-of-bounds array access would be performed, or if shape/dtype don't match with self's shape/dtype. """ if self.name not in args: raise InvalidArgument("No runtime value for `%s`" % self.name) key = args[self.name] if len(key.shape) != self.ndim: raise InvalidArgument("Shape %s of runtime value `%s` does not match " "dimensions %s" % (key.shape, self.name, self.indices)) if key.dtype != self.dtype: warning("Data type %s of runtime value `%s` does not match the " "Function data type %s" % (key.dtype, self.name, self.dtype)) for i, s in zip(self.indices, key.shape): i._arg_check(args, s, intervals[i]) def _arg_as_ctype(self, args, alias=None): key = alias or self return ReducerMap({key.name: self._C_make_dataobj(args[key.name])}) # Pickling support _pickle_kwargs = AbstractCachedFunction._pickle_kwargs +\ ['grid', 'staggered', 'initializer']
def get_boundary_flux_mod(fluxes, fvi, discr, dtype): from cgen import \ FunctionDeclaration, FunctionBody, Typedef, Struct, \ Const, Reference, Value, POD, MaybeUnused, \ Statement, Include, Line, Block, Initializer, Assign, \ CustomLoop, For from pytools import to_uncomplex_dtype, flatten from codepy.bpl import BoostPythonModule mod = BoostPythonModule() mod.add_to_preamble([ Include("cstdlib"), Include("algorithm"), Line(), Include("boost/foreach.hpp"), Line(), Include("hedge/face_operators.hpp"), ]) S = Statement mod.add_to_module([ S("using namespace hedge"), S("using namespace pyublas"), Line(), Typedef(POD(dtype, "value_type")), Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")), ]) arg_struct = Struct("arg_struct", [ Value("numpy_array<value_type>", "flux%d_on_faces" % i) for i in range(len(fluxes)) ]+[ Value("numpy_array<value_type>", arg_name) for arg_name in fvi.arg_names ]) mod.add_struct(arg_struct, "ArgStruct") mod.add_to_module([Line()]) fdecl = FunctionDeclaration( Value("void", "gather_flux"), [ Const(Reference(Value("face_group<face_pair<straight_face> >" , "fg"))), Reference(Value("arg_struct", "args")) ]) from pymbolic.mapper.stringifier import PREC_PRODUCT def gen_flux_code(): f2cm = FluxToCodeMapper() result = [ Assign("fof%d_it[loc_fof_base+i]" % flux_idx, "uncomplex_type(fp.int_side.face_jacobian) * " + flux_to_code(f2cm, False, flux_idx, fvi, flux.op.flux, PREC_PRODUCT)) for flux_idx, flux in enumerate(fluxes) ] return [ Initializer(Value("value_type", cse_name), cse_str) for cse_name, cse_str in f2cm.cse_name_list] + result fbody = Block([ Initializer( Const(Value("numpy_array<value_type>::iterator", "fof%d_it" % i)), "args.flux%d_on_faces.begin()" % i) for i in range(len(fluxes)) ]+[ Initializer( Const(Value("numpy_array<value_type>::const_iterator", "%s_it" % arg_name)), "args.%s.begin()" % arg_name) for arg_name in fvi.arg_names ]+[ Line(), CustomLoop("BOOST_FOREACH(const face_pair<straight_face> &fp, fg.face_pairs)", Block( list(flatten([ Initializer(Value("node_number_t", "%s_ebi" % where), "fp.%s.el_base_index" % where), Initializer(Value("index_lists_t::const_iterator", "%s_idx_list" % where), "fg.index_list(fp.%s.face_index_list_number)" % where), Line(), ] for where in ["int_side", "ext_side"] ))+[ Line(), Initializer(Value("node_number_t", "loc_fof_base"), "fg.face_length()*(fp.%(where)s.local_el_number*fg.face_count" " + fp.%(where)s.face_id)" % {"where": "int_side"}), Line(), For( "unsigned i = 0", "i < fg.face_length()", "++i", Block( [ Initializer(MaybeUnused( Value("node_number_t", "%s_idx" % where)), "%(where)s_ebi + %(where)s_idx_list[i]" % {"where": where}) for where in ["int_side", "ext_side"] ]+gen_flux_code() ) ) ])) ]) mod.add_function(FunctionBody(fdecl, fbody)) #print "----------------------------------------------------------------" #print mod.generate() #raw_input("[Enter]") return mod.compile(get_flux_toolchain(discr, fluxes))
def create_native(self): from cgen import (ArrayOf, POD, Block, For, Statement, Struct) from cgen import dtype_to_ctype import numpy members = [] code = [] for pk, pv in config.parameters.iteritems(): if isinstance(pv, int): members.append(POD(numpy.int, pk)) code.append( Statement("params.%s = extract<%s>(cppdict[\"%s\"])" % (pk, dtype_to_ctype(numpy.int), pk))) elif isinstance(pv, float): members.append(POD(numpy.float64, pk)) code.append( Statement("params.%s = extract<%s>(cppdict[\"%s\"])" % (pk, dtype_to_ctype(numpy.float64), pk))) elif isinstance(pv, list): if isinstance(pv[0], int): members.append(ArrayOf(POD(numpy.int, pk), len(pv))) code.append( Block([ Statement("list v = extract<%s>(cppdict[\"%s\"])" % (list.__name__, pk)), For( "unsigned int i = 0", "i<len(v)", "++i", Statement("params.%s[i] = extract<%s>(v[i])" % (pk, dtype_to_ctype(numpy.int)))), ])) elif isinstance(pv[0], float): members.append(ArrayOf(POD(numpy.float64, pk), len(pv))) code.append( Block([ Statement("list v = extract<%s>(cppdict[\"%s\"])" % (list.__name__, pk)), For( "unsigned int i = 0", "i < len(v)", "++i", Block([ Statement( "params.%s[i] = extract<%s>(v[i])" % (pk, dtype_to_ctype(numpy.float64))), Statement( "//std::cout << params.%s[i] << std::endl" % (pk)) ])), ])) mystruct = Struct('Parameters', members) mycode = Block(code) # print mystruct # print mycode from jinja2 import Template tpl = Template(""" #include <boost/python.hpp> #include <boost/python/object.hpp> #include <boost/python/extract.hpp> #include <boost/python/list.hpp> #include <boost/python/dict.hpp> #include <boost/python/str.hpp> #include <stdexcept> #include <iostream> {{my_struct}} Parameters params; void CopyDictionary(boost::python::object pydict) { using namespace boost::python; extract< dict > cppdict_ext(pydict); if(!cppdict_ext.check()){ throw std::runtime_error( "PassObj::pass_dict: type error: not a python dict."); } dict cppdict = cppdict_ext(); list keylist = cppdict.keys(); {{my_extractor}} } BOOST_PYTHON_MODULE({{my_module}}) { boost::python::def("copy_dict", &CopyDictionary); } """) rendered_tpl = tpl.render(my_module="NativeParameters", my_extractor=mycode, my_struct=mystruct) # print rendered_tpl from codepy.toolchain import NVCCToolchain import codepy.toolchain kwargs = codepy.toolchain._guess_toolchain_kwargs_from_python_config() # print kwargs kwargs["cc"] = "nvcc" # kwargs["cflags"]=["-m64","-x","cu","-Xcompiler","-fPIC","-ccbin","/opt/local/bin/g++-mp-4.4"] kwargs["cflags"] = ["-m64", "-x", "cu", "-Xcompiler", "-fPIC"] kwargs["include_dirs"].append("/usr/local/cuda/include") kwargs["defines"] = [] kwargs["ldflags"] = ["-shared"] # kwargs["libraries"]=["python2.7"] kwargs["libraries"] = ["python2.6"] print kwargs toolchain = NVCCToolchain(**kwargs) from codepy.libraries import add_boost_python add_boost_python(toolchain) from codepy.jit import extension_from_string mymod = extension_from_string(toolchain, "NativeParameters", rendered_tpl) mymod.copy_dict(config.parameters)
def ctype(self): """ Returns a :class:`cgen.Struct` relative to the profiler. """ return Struct(Profiler.typename, [Value('double', n) for n in self._timers])