class Data(object): """ Data type descriptors that can be used as references to memory. Examples: Arrays, Streams, custom arrays (e.g., sparse matrices). """ dtype = TypeClassProperty(default=dtypes.int32) shape = ShapeProperty(default=[]) transient = Property(dtype=bool, default=False) storage = Property(dtype=dtypes.StorageType, desc="Storage location", choices=dtypes.StorageType, default=dtypes.StorageType.Default, from_string=lambda x: dtypes.StorageType[x]) lifetime = Property(dtype=dtypes.AllocationLifetime, desc='Data allocation span', choices=dtypes.AllocationLifetime, default=dtypes.AllocationLifetime.Scope, from_string=lambda x: dtypes.AllocationLifetime[x]) location = DictProperty( key_type=str, value_type=symbolic.pystr_to_symbolic, desc='Full storage location identifier (e.g., rank, GPU ID)') debuginfo = DebugInfoProperty(allow_none=True) def __init__(self, dtype, shape, transient, storage, location, lifetime, debuginfo): self.dtype = dtype self.shape = shape self.transient = transient self.storage = storage self.location = location if location is not None else {} self.lifetime = lifetime self.debuginfo = debuginfo self._validate() def validate(self): """ Validate the correctness of this object. Raises an exception on error. """ self._validate() # Validation of this class is in a separate function, so that this # class can call `_validate()` without calling the subclasses' # `validate` function. def _validate(self): if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape): raise TypeError('Shape must be a list or tuple of integer values ' 'or symbols') return True def to_json(self): attrs = serialize.all_properties_to_json(self) retdict = {"type": type(self).__name__, "attributes": attrs} return retdict @property def toplevel(self): return self.lifetime is not dtypes.AllocationLifetime.Scope def copy(self): raise RuntimeError( 'Data descriptors are unique and should not be copied') def is_equivalent(self, other): """ Check for equivalence (shape and type) of two data descriptors. """ raise NotImplementedError def as_arg(self, with_types=True, for_call=False, name=None): """Returns a string for a C++ function signature (e.g., `int *A`). """ raise NotImplementedError @property def free_symbols(self) -> Set[symbolic.SymbolicType]: """ Returns a set of undefined symbols in this data descriptor. """ result = set() for s in self.shape: if isinstance(s, sp.Basic): result |= set(s.free_symbols) return result def __repr__(self): return 'Abstract Data Container, DO NOT USE' @property def veclen(self): return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1
class Data(object): """ Data type descriptors that can be used as references to memory. Examples: Arrays, Streams, custom arrays (e.g., sparse matrices). """ dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses) shape = ShapeProperty(default=[]) transient = Property(dtype=bool, default=False) storage = EnumProperty(dtype=dtypes.StorageType, desc="Storage location", default=dtypes.StorageType.Default) lifetime = EnumProperty(dtype=dtypes.AllocationLifetime, desc='Data allocation span', default=dtypes.AllocationLifetime.Scope) location = DictProperty( key_type=str, value_type=str, desc='Full storage location identifier (e.g., rank, GPU ID)') debuginfo = DebugInfoProperty(allow_none=True) def __init__(self, dtype, shape, transient, storage, location, lifetime, debuginfo): self.dtype = dtype self.shape = shape self.transient = transient self.storage = storage self.location = location if location is not None else {} self.lifetime = lifetime self.debuginfo = debuginfo self._validate() def validate(self): """ Validate the correctness of this object. Raises an exception on error. """ self._validate() # Validation of this class is in a separate function, so that this # class can call `_validate()` without calling the subclasses' # `validate` function. def _validate(self): if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape): raise TypeError('Shape must be a list or tuple of integer values ' 'or symbols') return True def to_json(self): attrs = serialize.all_properties_to_json(self) retdict = {"type": type(self).__name__, "attributes": attrs} return retdict @property def toplevel(self): return self.lifetime is not dtypes.AllocationLifetime.Scope def copy(self): raise RuntimeError( 'Data descriptors are unique and should not be copied') def is_equivalent(self, other): """ Check for equivalence (shape and type) of two data descriptors. """ raise NotImplementedError def as_arg(self, with_types=True, for_call=False, name=None): """Returns a string for a C++ function signature (e.g., `int *A`). """ raise NotImplementedError @property def free_symbols(self) -> Set[symbolic.SymbolicType]: """ Returns a set of undefined symbols in this data descriptor. """ result = set() for s in self.shape: if isinstance(s, sp.Basic): result |= set(s.free_symbols) return result def __repr__(self): return 'Abstract Data Container, DO NOT USE' @property def veclen(self): return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1 @property def ctype(self): return self.dtype.ctype def strides_from_layout( self, *dimensions: int, alignment: symbolic.SymbolicType = 1, only_first_aligned: bool = False, ) -> Tuple[Tuple[symbolic.SymbolicType], symbolic.SymbolicType]: """ Returns the absolute strides and total size of this data descriptor, according to the given dimension ordering and alignment. :param dimensions: A sequence of integers representing a permutation of the descriptor's dimensions. :param alignment: Padding (in elements) at the end, ensuring stride is a multiple of this number. 1 (default) means no padding. :param only_first_aligned: If True, only the first dimension is padded with ``alignment``. Otherwise all dimensions are. :return: A 2-tuple of (tuple of strides, total size). """ # Verify dimensions if tuple(sorted(dimensions)) != tuple(range(len(self.shape))): raise ValueError('Every dimension must be given and appear once.') if (alignment < 1) == True or (alignment < 0) == True: raise ValueError('Invalid alignment value') strides = [1] * len(dimensions) total_size = 1 first = True for dim in dimensions: strides[dim] = total_size if not only_first_aligned or first: dimsize = (((self.shape[dim] + alignment - 1) // alignment) * alignment) else: dimsize = self.shape[dim] total_size *= dimsize first = False return (tuple(strides), total_size) def set_strides_from_layout(self, *dimensions: int, alignment: symbolic.SymbolicType = 1, only_first_aligned: bool = False): """ Sets the absolute strides and total size of this data descriptor, according to the given dimension ordering and alignment. :param dimensions: A sequence of integers representing a permutation of the descriptor's dimensions. :param alignment: Padding (in elements) at the end, ensuring stride is a multiple of this number. 1 (default) means no padding. :param only_first_aligned: If True, only the first dimension is padded with ``alignment``. Otherwise all dimensions are. """ strides, totalsize = self.strides_from_layout( *dimensions, alignment=alignment, only_first_aligned=only_first_aligned) self.strides = strides self.total_size = totalsize
class Data(object): """ Data type descriptors that can be used as references to memory. Examples: Arrays, Streams, custom arrays (e.g., sparse matrices). """ dtype = TypeClassProperty() shape = ShapeProperty() transient = Property(dtype=bool) storage = Property(dtype=dace.dtypes.StorageType, desc="Storage location", choices=dace.dtypes.StorageType, default=dace.dtypes.StorageType.Default, from_string=lambda x: dtypes.StorageType[x]) location = Property( dtype=str, # Dict[str, symbolic] desc='Full storage location identifier (e.g., rank, GPU ID)', default='') toplevel = Property(dtype=bool, desc="Allocate array outside of state", default=False) debuginfo = DebugInfoProperty() def __init__(self, dtype, shape, transient, storage, location, toplevel, debuginfo): self.dtype = dtype self.shape = shape self.transient = transient self.storage = storage self.location = location self.toplevel = toplevel self.debuginfo = debuginfo self._validate() def validate(self): """ Validate the correctness of this object. Raises an exception on error. """ self._validate() # Validation of this class is in a separate function, so that this # class can call `_validate()` without calling the subclasses' # `validate` function. def _validate(self): if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape): raise TypeError('Shape must be a list or tuple of integer values ' 'or symbols') return True def to_json(self): attrs = dace.serialize.all_properties_to_json(self) retdict = {"type": type(self).__name__, "attributes": attrs} return retdict def copy(self): raise RuntimeError( 'Data descriptors are unique and should not be copied') def is_equivalent(self, other): """ Check for equivalence (shape and type) of two data descriptors. """ raise NotImplementedError def signature(self, with_types=True, for_call=False, name=None): """Returns a string for a C++ function signature (e.g., `int *A`). """ raise NotImplementedError def __repr__(self): return 'Abstract Data Container, DO NOT USE'
class SubArray(object): """ Sub-arrays describe subsets of Arrays (see `dace::data::Array`) for purposes of distributed communication. They are implemented with [MPI_Type_create_subarray](https://www.mpich.org/static/docs/v3.2/www3/MPI_Type_create_subarray.html). Sub-arrays can be also used for collective scatter/gather communication in a process-grid. The `shape`, `subshape`, and `dtype` properties correspond to the `array_of_sizes`, `array_of_subsizes`, and `oldtype` parameters of `MPI_Type_create_subarray`. The following properties are used for collective scatter/gather communication in a process-grid: The `pgrid` property is the name of the process-grid where the data will be distributed. The `correspondence` property matches the arrays dimensions to the process-grid's dimensions. For example, if one wants to distribute a matrix to a 2D process-grid, but tile the matrix rows over the grid's columns, then `correspondence = [1, 0]`. """ name = Property(dtype=str, desc="The type's name.") dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses) shape = ShapeProperty(default=[], desc="The array's shape.") subshape = ShapeProperty(default=[], desc="The sub-array's shape.") pgrid = Property( dtype=str, allow_none=True, default=None, desc="Name of the process-grid where the data are distributed.") correspondence = ListProperty( int, allow_none=True, default=None, desc="Correspondence of the array's indices to the process grid's " "indices.") def __init__(self, name: str, dtype: dtypes.typeclass, shape: ShapeType, subshape: ShapeType, pgrid: str = None, correspondence: Sequence[Integral] = None): self.name = name self.dtype = dtype self.shape = shape self.subshape = subshape self.pgrid = pgrid self.correspondence = correspondence or list(range(len(shape))) self._validate() def validate(self): """ Validate the correctness of this object. Raises an exception on error. """ self._validate() # Validation of this class is in a separate function, so that this # class can call `_validate()` without calling the subclasses' # `validate` function. def _validate(self): if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape): raise TypeError( 'Shape must be a list or tuple of integer values or symbols') if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.subshape): raise TypeError( 'Sub-shape must be a list or tuple of integer values or symbols' ) if any(not isinstance(i, Integral) for i in self.correspondence): raise TypeError( 'Correspondence must be a list or tuple of integer values') if len(self.shape) != len(self.subshape): raise ValueError( 'The dimensionality of the shape and sub-shape must match') if len(self.correspondence) != len(self.shape): raise ValueError( 'The dimensionality of the shape and correspondence list must match' ) return True def to_json(self): attrs = serialize.all_properties_to_json(self) retdict = {"type": type(self).__name__, "attributes": attrs} return retdict @classmethod def from_json(cls, json_obj, context=None): # Create dummy object ret = cls('tmp', dtypes.int8, [], [], 'tmp', []) serialize.set_properties_from_json(ret, json_obj, context=context) # Check validity now ret.validate() return ret def init_code(self): """ Outputs MPI allocation/initialization code for the sub-array MPI datatype ONLY if the process-grid is set. It is assumed that the following variables exist in the SDFG program's state: - MPI_Datatype {self.name} - int* {self.name}_counts - int* {self.name}_displs These variables are typically added to the program's state through a Tasklet, e.g., the Dummy MPI node (for more details, check the DaCe MPI library in `dace/libraries/mpi`). """ from dace.libraries.mpi import utils if self.pgrid: return f""" if (__state->{self.pgrid}_valid) {{ int sizes[{len(self.shape)}] = {{{', '.join([str(s) for s in self.shape])}}}; int subsizes[{len(self.shape)}] = {{{', '.join([str(s) for s in self.subshape])}}}; int corr[{len(self.shape)}] = {{{', '.join([str(i) for i in self.correspondence])}}}; int basic_stride = subsizes[{len(self.shape)} - 1]; int process_strides[{len(self.shape)}]; int block_strides[{len(self.shape)}]; int data_strides[{len(self.shape)}]; process_strides[{len(self.shape)} - 1] = 1; block_strides[{len(self.shape)} - 1] = subsizes[{len(self.shape)} - 1]; data_strides[{len(self.shape)} - 1] = 1; for (auto i = {len(self.shape)} - 2; i >= 0; --i) {{ block_strides[i] = block_strides[i+1] * subsizes[i]; process_strides[i] = process_strides[i+1] * __state->{self.pgrid}_dims[corr[i+1]]; data_strides[i] = block_strides[i] * process_strides[i] / basic_stride; }} MPI_Datatype type; int origin[{len(self.shape)}] = {{{','.join(['0'] * len(self.shape))}}}; MPI_Type_create_subarray({len(self.shape)}, sizes, subsizes, origin, MPI_ORDER_C, {utils.MPI_DDT(self.dtype.base_type)}, &type); MPI_Type_create_resized(type, 0, basic_stride*sizeof({self.dtype.ctype}), &__state->{self.name}); MPI_Type_commit(&__state->{self.name}); __state->{self.name}_counts = new int[__state->{self.pgrid}_size]; __state->{self.name}_displs = new int[__state->{self.pgrid}_size]; int block_id[{len(self.shape)}] = {{0}}; int displ = 0; for (auto i = 0; i < __state->{self.pgrid}_size; ++i) {{ __state->{self.name}_counts[i] = 1; __state->{self.name}_displs[i] = displ; int idx = {len(self.shape)} - 1; while (idx >= 0 && block_id[idx] + 1 >= __state->{self.pgrid}_dims[corr[idx]]) {{ block_id[idx] = 0; displ -= data_strides[idx] * (__state->{self.pgrid}_dims[corr[idx]] - 1); idx--; }} if (idx >= 0) {{ block_id[idx] += 1; displ += data_strides[idx]; }} else {{ assert(i == __state->{self.pgrid}_size - 1); }} }} }} """ else: return "" def exit_code(self): """ Outputs MPI deallocation code for the sub-array MPI datatype ONLY if the process-grid is set. """ if self.pgrid: return f""" if (__state->{self.pgrid}_valid) {{ delete[] __state->{self.name}_counts; delete[] __state->{self.name}_displs; MPI_Type_free(&__state->{self.name}); }} """ else: return ""