예제 #1
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty(default=dtypes.int32)
    shape = ShapeProperty(default=[])
    transient = Property(dtype=bool, default=False)
    storage = Property(dtype=dtypes.StorageType,
                       desc="Storage location",
                       choices=dtypes.StorageType,
                       default=dtypes.StorageType.Default,
                       from_string=lambda x: dtypes.StorageType[x])
    lifetime = Property(dtype=dtypes.AllocationLifetime,
                        desc='Data allocation span',
                        choices=dtypes.AllocationLifetime,
                        default=dtypes.AllocationLifetime.Scope,
                        from_string=lambda x: dtypes.AllocationLifetime[x])
    location = DictProperty(
        key_type=str,
        value_type=symbolic.pystr_to_symbolic,
        desc='Full storage location identifier (e.g., rank, GPU ID)')
    debuginfo = DebugInfoProperty(allow_none=True)

    def __init__(self, dtype, shape, transient, storage, location, lifetime,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location if location is not None else {}
        self.lifetime = lifetime
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @property
    def toplevel(self):
        return self.lifetime is not dtypes.AllocationLifetime.Scope

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def as_arg(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    @property
    def free_symbols(self) -> Set[symbolic.SymbolicType]:
        """ Returns a set of undefined symbols in this data descriptor. """
        result = set()
        for s in self.shape:
            if isinstance(s, sp.Basic):
                result |= set(s.free_symbols)
        return result

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'

    @property
    def veclen(self):
        return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1
예제 #2
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses)
    shape = ShapeProperty(default=[])
    transient = Property(dtype=bool, default=False)
    storage = EnumProperty(dtype=dtypes.StorageType,
                           desc="Storage location",
                           default=dtypes.StorageType.Default)
    lifetime = EnumProperty(dtype=dtypes.AllocationLifetime,
                            desc='Data allocation span',
                            default=dtypes.AllocationLifetime.Scope)
    location = DictProperty(
        key_type=str,
        value_type=str,
        desc='Full storage location identifier (e.g., rank, GPU ID)')
    debuginfo = DebugInfoProperty(allow_none=True)

    def __init__(self, dtype, shape, transient, storage, location, lifetime,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location if location is not None else {}
        self.lifetime = lifetime
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @property
    def toplevel(self):
        return self.lifetime is not dtypes.AllocationLifetime.Scope

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def as_arg(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    @property
    def free_symbols(self) -> Set[symbolic.SymbolicType]:
        """ Returns a set of undefined symbols in this data descriptor. """
        result = set()
        for s in self.shape:
            if isinstance(s, sp.Basic):
                result |= set(s.free_symbols)
        return result

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'

    @property
    def veclen(self):
        return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1

    @property
    def ctype(self):
        return self.dtype.ctype

    def strides_from_layout(
        self,
        *dimensions: int,
        alignment: symbolic.SymbolicType = 1,
        only_first_aligned: bool = False,
    ) -> Tuple[Tuple[symbolic.SymbolicType], symbolic.SymbolicType]:
        """
        Returns the absolute strides and total size of this data descriptor,
        according to the given dimension ordering and alignment.
        :param dimensions: A sequence of integers representing a permutation
                           of the descriptor's dimensions.
        :param alignment: Padding (in elements) at the end, ensuring stride
                          is a multiple of this number. 1 (default) means no
                          padding.
        :param only_first_aligned: If True, only the first dimension is padded
                                   with ``alignment``. Otherwise all dimensions
                                   are.
        :return: A 2-tuple of (tuple of strides, total size).
        """
        # Verify dimensions
        if tuple(sorted(dimensions)) != tuple(range(len(self.shape))):
            raise ValueError('Every dimension must be given and appear once.')
        if (alignment < 1) == True or (alignment < 0) == True:
            raise ValueError('Invalid alignment value')

        strides = [1] * len(dimensions)
        total_size = 1
        first = True
        for dim in dimensions:
            strides[dim] = total_size
            if not only_first_aligned or first:
                dimsize = (((self.shape[dim] + alignment - 1) // alignment) *
                           alignment)
            else:
                dimsize = self.shape[dim]
            total_size *= dimsize
            first = False

        return (tuple(strides), total_size)

    def set_strides_from_layout(self,
                                *dimensions: int,
                                alignment: symbolic.SymbolicType = 1,
                                only_first_aligned: bool = False):
        """
        Sets the absolute strides and total size of this data descriptor,
        according to the given dimension ordering and alignment.
        :param dimensions: A sequence of integers representing a permutation
                           of the descriptor's dimensions.
        :param alignment: Padding (in elements) at the end, ensuring stride
                          is a multiple of this number. 1 (default) means no
                          padding.
        :param only_first_aligned: If True, only the first dimension is padded
                                   with ``alignment``. Otherwise all dimensions
                                   are.
        """
        strides, totalsize = self.strides_from_layout(
            *dimensions,
            alignment=alignment,
            only_first_aligned=only_first_aligned)
        self.strides = strides
        self.total_size = totalsize
예제 #3
0
파일: data.py 프로젝트: tbennun/dace
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty()
    shape = ShapeProperty()
    transient = Property(dtype=bool)
    storage = Property(dtype=dace.dtypes.StorageType,
                       desc="Storage location",
                       choices=dace.dtypes.StorageType,
                       default=dace.dtypes.StorageType.Default,
                       from_string=lambda x: dtypes.StorageType[x])
    location = Property(
        dtype=str,  # Dict[str, symbolic]
        desc='Full storage location identifier (e.g., rank, GPU ID)',
        default='')
    toplevel = Property(dtype=bool,
                        desc="Allocate array outside of state",
                        default=False)
    debuginfo = DebugInfoProperty()

    def __init__(self, dtype, shape, transient, storage, location, toplevel,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location
        self.toplevel = toplevel
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def to_json(self):
        attrs = dace.serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def signature(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'
예제 #4
0
class SubArray(object):
    """
    Sub-arrays describe subsets of Arrays (see `dace::data::Array`) for purposes of distributed communication. They are
    implemented with [MPI_Type_create_subarray](https://www.mpich.org/static/docs/v3.2/www3/MPI_Type_create_subarray.html).
    Sub-arrays can be also used for collective scatter/gather communication in a process-grid.

    The `shape`, `subshape`, and `dtype` properties correspond to the `array_of_sizes`, `array_of_subsizes`, and
    `oldtype` parameters of `MPI_Type_create_subarray`.

    The following properties are used for collective scatter/gather communication in a process-grid:

    The `pgrid` property is the name of the process-grid where the data will be distributed. The `correspondence`
    property matches the arrays dimensions to the process-grid's dimensions. For example, if one wants to distribute
    a matrix to a 2D process-grid, but tile the matrix rows over the grid's columns, then `correspondence = [1, 0]`.
    """

    name = Property(dtype=str, desc="The type's name.")
    dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses)
    shape = ShapeProperty(default=[], desc="The array's shape.")
    subshape = ShapeProperty(default=[], desc="The sub-array's shape.")
    pgrid = Property(
        dtype=str,
        allow_none=True,
        default=None,
        desc="Name of the process-grid where the data are distributed.")
    correspondence = ListProperty(
        int,
        allow_none=True,
        default=None,
        desc="Correspondence of the array's indices to the process grid's "
        "indices.")

    def __init__(self,
                 name: str,
                 dtype: dtypes.typeclass,
                 shape: ShapeType,
                 subshape: ShapeType,
                 pgrid: str = None,
                 correspondence: Sequence[Integral] = None):
        self.name = name
        self.dtype = dtype
        self.shape = shape
        self.subshape = subshape
        self.pgrid = pgrid
        self.correspondence = correspondence or list(range(len(shape)))
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError(
                'Shape must be a list or tuple of integer values or symbols')
        if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic))
               for s in self.subshape):
            raise TypeError(
                'Sub-shape must be a list or tuple of integer values or symbols'
            )
        if any(not isinstance(i, Integral) for i in self.correspondence):
            raise TypeError(
                'Correspondence must be a list or tuple of integer values')
        if len(self.shape) != len(self.subshape):
            raise ValueError(
                'The dimensionality of the shape and sub-shape must match')
        if len(self.correspondence) != len(self.shape):
            raise ValueError(
                'The dimensionality of the shape and correspondence list must match'
            )
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)
        retdict = {"type": type(self).__name__, "attributes": attrs}
        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls('tmp', dtypes.int8, [], [], 'tmp', [])
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # Check validity now
        ret.validate()
        return ret

    def init_code(self):
        """ Outputs MPI allocation/initialization code for the sub-array MPI datatype ONLY if the process-grid is set.
            It is assumed that the following variables exist in the SDFG program's state:
            - MPI_Datatype {self.name}
            - int* {self.name}_counts
            - int* {self.name}_displs

            These variables are typically added to the program's state through a Tasklet, e.g., the Dummy MPI node (for
            more details, check the DaCe MPI library in `dace/libraries/mpi`).
        """
        from dace.libraries.mpi import utils
        if self.pgrid:
            return f"""
                if (__state->{self.pgrid}_valid) {{
                    int sizes[{len(self.shape)}] = {{{', '.join([str(s) for s in self.shape])}}};
                    int subsizes[{len(self.shape)}] = {{{', '.join([str(s) for s in self.subshape])}}};
                    int corr[{len(self.shape)}] = {{{', '.join([str(i) for i in self.correspondence])}}};

                    int basic_stride = subsizes[{len(self.shape)} - 1];

                    int process_strides[{len(self.shape)}];
                    int block_strides[{len(self.shape)}];
                    int data_strides[{len(self.shape)}];

                    process_strides[{len(self.shape)} - 1] = 1;
                    block_strides[{len(self.shape)} - 1] = subsizes[{len(self.shape)} - 1];
                    data_strides[{len(self.shape)} - 1] = 1;

                    for (auto i = {len(self.shape)} - 2; i >= 0; --i) {{
                        block_strides[i] = block_strides[i+1] * subsizes[i];
                        process_strides[i] = process_strides[i+1] * __state->{self.pgrid}_dims[corr[i+1]];
                        data_strides[i] = block_strides[i] * process_strides[i] / basic_stride;
                    }}

                    MPI_Datatype type;
                    int origin[{len(self.shape)}] = {{{','.join(['0'] * len(self.shape))}}};
                    MPI_Type_create_subarray({len(self.shape)}, sizes, subsizes, origin, MPI_ORDER_C, {utils.MPI_DDT(self.dtype.base_type)}, &type);
                    MPI_Type_create_resized(type, 0, basic_stride*sizeof({self.dtype.ctype}), &__state->{self.name});
                    MPI_Type_commit(&__state->{self.name});

                    __state->{self.name}_counts = new int[__state->{self.pgrid}_size];
                    __state->{self.name}_displs = new int[__state->{self.pgrid}_size];
                    int block_id[{len(self.shape)}] = {{0}};
                    int displ = 0;
                    for (auto i = 0; i < __state->{self.pgrid}_size; ++i) {{
                        __state->{self.name}_counts[i] = 1;
                        __state->{self.name}_displs[i] = displ;
                        int idx = {len(self.shape)} - 1;
                        while (idx >= 0 && block_id[idx] + 1 >= __state->{self.pgrid}_dims[corr[idx]]) {{
                            block_id[idx] = 0;
                            displ -= data_strides[idx] * (__state->{self.pgrid}_dims[corr[idx]] - 1);
                            idx--;
                        }}
                        if (idx >= 0) {{ 
                            block_id[idx] += 1;
                            displ += data_strides[idx];
                        }} else {{
                            assert(i == __state->{self.pgrid}_size - 1);
                        }}
                    }}
                }}
            """
        else:
            return ""

    def exit_code(self):
        """ Outputs MPI deallocation code for the sub-array MPI datatype ONLY if the process-grid is set. """
        if self.pgrid:
            return f"""
                if (__state->{self.pgrid}_valid) {{
                    delete[] __state->{self.name}_counts;
                    delete[] __state->{self.name}_displs;
                    MPI_Type_free(&__state->{self.name});
                }}
            """
        else:
            return ""